Exemple #1
0
    def testObjectArray(self):
        """ Tests de-/serialization of object np.arrays"""

        arr = np.array(np.random.random((50, 50)), dtype=object)

        tmp_file = tempfile.NamedTemporaryFile()

        serialize_to_file(arr, tmp_file.name)
        deserialize_from_file(tmp_file.name)
Exemple #2
0
def getByID(id):
    """
    This functions retrieves an object from an external storage technology
    from the obj object.
    This dummy returns the same object as submited by the parameter obj.
    @param obj: key/object of the object to be retrieved.
    @return: the real object.
    """
    #print "-----------------------------------------------------"
    #print "| WARNING!!! - YOU ARE USING THE DUMMY STORAGE API. |"
    #print "| Call to: getByID                                  |"
    #print "|   *********************************************   |"
    #print "|   *** Check that you really want to use the ***   |"
    #print "|   ************* dummy storage api *************   |"
    #print "|   *********************************************   |"
    #print "-----------------------------------------------------"
    if id is not None:
        try:
            file_name = id + '.PSCO'
            file_path = storage_path + file_name
            obj = deserialize_from_file(file_path)
            obj.setID(id)
            return obj
        except ValueError:
            # The id does not complain uuid4 --> raise an exception
            print(
                "Error: the ID for getByID does not complain the uuid4 format."
            )
            raise ValueError(
                'Using the dummy storage API getByID with wrong id.')
    else:
        # Using a None id --> raise an exception
        print("Error: the ID for getByID is None.")
        raise ValueError('Using the dummy storage API getByID with None id.')
Exemple #3
0
def test_serialize_deserialize_obj_to_file():
    # Uses serialize to handler underneath.
    from pycompss.util.serialization.serializer import serialize_to_file
    from pycompss.util.serialization.serializer import deserialize_from_file
    target_file = "target.pkl"
    obj = [1, 3, 2, "hello", "world"]
    serialize_to_file(obj, target_file)
    result = deserialize_from_file(target_file)
    os.remove(target_file)
    assert obj == result, "ERROR: Object serialization and deserialization retrieved wrong object."  # noqa: E501
Exemple #4
0
def test_serialize_deserialize_obj_to_file_no_gc():
    # Uses serialize to handler underneath.
    import pycompss.util.serialization.serializer as serializer
    from pycompss.util.serialization.serializer import serialize_to_file
    from pycompss.util.serialization.serializer import deserialize_from_file
    serializer.DISABLE_GC = True
    target_file = "target.pkl"
    obj = [1, 3, 2, "hello", "world"]
    serialize_to_file(obj, target_file)
    result = deserialize_from_file(target_file)
    os.remove(target_file)
    assert obj == result, "ERROR: Object serialization and deserialization (without garbage collector) retrieved wrong object."  # noqa: E501
Exemple #5
0
def test_serialize_deserialize_np_to_file():
    # Uses serialize to handler underneath.
    from pycompss.util.serialization.serializer import serialize_to_file
    from pycompss.util.serialization.serializer import deserialize_from_file
    target_file_np = "target_np.pkl"
    obj_np = np.random.rand(4, 4)
    serialize_to_file(obj_np, target_file_np)
    result_np = deserialize_from_file(target_file_np)
    os.remove(target_file_np)
    assert np.array_equal(
        obj_np, result_np
    ), "ERROR: Numpy object serialization and deserialization retrieved wrong object."  # noqa: E501
Exemple #6
0
def test_serialize_deserialize_np_to_file_no_gc():
    # Uses serialize to handler underneath.
    import pycompss.util.serialization.serializer as serializer
    from pycompss.util.serialization.serializer import serialize_to_file
    from pycompss.util.serialization.serializer import deserialize_from_file
    serializer.DISABLE_GC = True
    target_file_np = "target_np.pkl"
    obj_np = np.random.rand(4, 4)
    serialize_to_file(obj_np, target_file_np)
    result_np = deserialize_from_file(target_file_np)
    os.remove(target_file_np)
    assert np.array_equal(
        obj_np, result_np
    ), "ERROR: Numpy object serialization and deserialization (without garbage collector) retrieved wrong object."  # noqa: E501
Exemple #7
0
def test_serialize_objects():
    from pycompss.util.serialization.serializer import serialize_objects
    from pycompss.util.serialization.serializer import deserialize_from_file
    obj1 = ([1, 2, 3, 4], "obj1.pkl")
    obj2 = ({"hello": "mars", "goodbye": "world"}, "obj2.pkl")
    obj3 = (np.random.rand(3, 3), "obj3.pkl")
    objects = [obj1, obj2, obj3]
    serialize_objects(objects)
    result = []
    for obj in objects:
        result.append(deserialize_from_file(obj[1]))
    os.remove(obj1[1])
    os.remove(obj2[1])
    os.remove(obj3[1])
    assert len(result) == len(
        objects), "ERROR: Wrong number of objects retrieved."  # noqa: E501
    assert result[0] == obj1[0], "ERROR: Wrong first object."
    assert result[1] == obj2[0], "ERROR: Wrong second object."
    assert np.array_equal(result[2], obj3[0]), "ERROR: Wrong third object."
Exemple #8
0
def get_by_id(id):  # noqa
    # type: (str) -> typing.Any
    """ This functions retrieves an object from an external storage
    technology from the obj object.
    This dummy returns the same object as submited by the parameter obj.

    :param id: key of the object to be retrieved.
    :return: the real object.
    """
    # Warning message:
    # print "-----------------------------------------------------"
    # print "| WARNING!!! - YOU ARE USING THE DUMMY STORAGE API. |"
    # print "| Call to: get_by_id                                  |"
    # print "|   *********************************************   |"
    # print "|   *** Check that you really want to use the ***   |"
    # print "|   ************* dummy storage api *************   |"
    # print "|   *********************************************   |"
    # print "-----------------------------------------------------"
    if id is not None:
        try:
            file_name = id + '.PSCO'
            file_path = STORAGE_PATH + file_name
            obj = deserialize_from_file(file_path)
            obj.setID(id)  # noqa
            return obj
        except ValueError:
            # The id does not complain uuid4 --> raise an exception
            print(
                "Error: the ID for get_by_id does not complain the uuid4 format."
            )  # noqa: E501
            raise ValueError(
                'Using the dummy storage API get_by_id with wrong id.'
            )  # noqa: E501
    else:
        # Using a None id --> raise an exception
        print("Error: the ID for get_by_id is None.")
        raise ValueError('Using the dummy storage API get_by_id with None id.')
Exemple #9
0
def evaluate_worker(worker, name, pipes, files, current_path, executor_out,
                    executor_in, worker_out, worker_in):
    temp_folder, executor_outbound, executor_inbound, control_worker_outbound, control_worker_inbound = files  # noqa: E501
    print("Starting " + name + " worker")
    worker.start()
    print("Temp folder: " + temp_folder)
    # Wait 2 seconds to start the worker.
    print("Waiting 2 seconds to send a task request")
    time.sleep(2)
    # Run a simple task
    job1_out = tempfile.NamedTemporaryFile(delete=False).name
    job1_err = tempfile.NamedTemporaryFile(delete=False).name
    simple_task_message = [
        "EXECUTE_TASK",
        "1",
        job1_out,
        job1_err,
        "0",
        "1",
        "true",
        "null",
        "METHOD",
        "common_piper_tester",
        "simple",
        "0",
        "1",
        "localhost",
        "1",
        "false",
        "None",
        "0",
        "0",
        "-",
        "0",
        "0",
    ]
    simple_task_message_str = " ".join(simple_task_message)
    print("Requesting: " + simple_task_message_str)
    if IS_PYTHON3:
        os.write(executor_out,
                 (simple_task_message_str + "\n").encode())  # noqa
    else:
        os.write(executor_out, simple_task_message_str + "\n")  # noqa
    time.sleep(2)
    # Run a increment task
    job2_out = tempfile.NamedTemporaryFile(delete=False).name
    job2_err = tempfile.NamedTemporaryFile(delete=False).name
    job2_result = tempfile.NamedTemporaryFile(delete=False).name
    increment_task_message = [
        "EXECUTE_TASK",
        "2",
        job2_out,
        job2_err,
        "0",
        "1",
        "true",
        "null",
        "METHOD",
        "common_piper_tester",
        "increment",
        "0",
        "1",
        "localhost",
        "1",
        "false",
        "9",
        "1",
        "2",
        "4",
        "3",
        "null",
        "value",
        "null",
        "1",
        "9",
        "3",
        "#",
        "$return_0",
        "null",
        job2_result + ":d1v2_1599560599402.IT:false:true:" + job2_result,
        "-",
        "0",
        "0",
    ]
    increment_task_message_str = " ".join(increment_task_message)
    print("Requesting: " + increment_task_message_str)
    if IS_PYTHON3:
        os.write(executor_out,
                 (increment_task_message_str + "\n").encode())  # noqa
    else:
        os.write(executor_out, increment_task_message_str + "\n")  # noqa
    time.sleep(2)
    # Send quit message
    os.write(executor_out, b"QUIT\n")
    os.write(worker_out, b"QUIT\n")
    # Wait for the worker to finish
    worker.join()
    # Cleanup
    # Close pipes
    os.close(executor_out)
    os.close(executor_in)
    os.close(worker_out)
    os.close(worker_in)
    # Remove pipes
    for pipe in pipes:
        os.unlink(pipe)
        if os.path.isfile(pipe):
            os.remove(pipe)
    # Check logs
    out_log = os.path.join(temp_folder, "binding_worker.out")
    err_log = os.path.join(temp_folder, "binding_worker.err")
    if os.path.exists(err_log):
        raise PyCOMPSsException(ERROR_MESSAGE + err_log)
    with open(out_log, "r") as f:
        if "ERROR" in f.read():
            raise PyCOMPSsException(ERROR_MESSAGE + out_log)
        if "Traceback" in f.read():
            raise PyCOMPSsException(ERROR_MESSAGE + out_log)
    # Check task 1
    check_task(job1_out, job1_err)
    # Check task 2
    check_task(job2_out, job2_err)
    result = deserialize_from_file(job2_result)
    if result != 2:
        raise PyCOMPSsException(
            "Wrong result obtained for increment task. Expected 2, received: "
            +  # noqa: E501
            str(result))

    # Remove logs
    os.remove(job1_out)
    os.remove(job1_err)
    os.remove(job2_out)
    os.remove(job2_err)
    os.remove(job2_result)
    if os.path.isfile(err_log):
        os.remove(err_log)
    if os.path.isfile(out_log):
        os.remove(out_log)
    if os.path.isfile(current_path + STD_OUT_FILE):
        os.remove(current_path + STD_OUT_FILE)
    if os.path.isfile(current_path + STD_ERR_FILE):
        os.remove(current_path + STD_ERR_FILE)
    shutil.rmtree(temp_folder)
    if os.path.isfile(executor_outbound):
        os.remove(executor_outbound)
    if os.path.isfile(executor_inbound):
        os.remove(executor_inbound)
    if os.path.isfile(control_worker_outbound):
        os.remove(control_worker_outbound)
    if os.path.isfile(control_worker_inbound):
        os.remove(control_worker_inbound)
Exemple #10
0
def test_piper_worker():
    # Override sys.argv to mimic runtime call
    sys_argv_backup = list(sys.argv)
    sys_path_backup = list(sys.path)

    sys.argv = [
        'piper_worker.py',
        '/tmp/',
        'false',
        'true',
        0,
        'null',
        'NONE',
        'localhost',
        '49049',
        '1',
        '/tmp/pipe_-504901196_executor0.outbound',
        '/tmp/pipe_-504901196_executor0.inbound',
        '/tmp/pipe_-504901196_control_worker.outbound',  # noqa: E501
        '/tmp/pipe_-504901196_control_worker.inbound'
    ]  # noqa: E501
    pipes = sys.argv[-4:]
    # Create pipes
    for pipe in pipes:
        if os.path.exists(pipe):
            os.remove(pipe)
        os.mkfifo(pipe)
    # Open pipes
    executor_out = os.open(pipes[0], os.O_RDWR)
    executor_in = os.open(pipes[1], os.O_RDWR)
    worker_out = os.open(pipes[2], os.O_RDWR)
    worker_in = os.open(pipes[3], os.O_RDWR)

    current_path = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(current_path)
    # Start the piper worker in a separate thread
    worker = multiprocessing.Process(target=worker_thread,
                                     args=(sys.argv, current_path))
    print("Starting piper worker")
    worker.start()
    # Wait 2 seconds to start the worker.
    print("Waiting 2 seconds to send a task request")
    time.sleep(2)
    # Run a simple task
    job1_out = '/tmp/job1_NEW.out'
    job1_err = '/tmp/job1_NEW.err'
    simple_task_message = [
        'EXECUTE_TASK', '1', job1_out, job1_err, '0', '1', 'true', 'null',
        'METHOD', 'test_piper', 'simple', '0', '1', 'localhost', '1', 'false',
        'None', '0', '0', '-', '0', '0'
    ]
    simple_task_message_str = " ".join(simple_task_message)
    print("Requesting: " + simple_task_message_str)
    os.write(executor_out, simple_task_message_str + '\n')  # noqa
    time.sleep(2)
    # Run a increment task
    job2_out = '/tmp/job2_NEW.out'
    job2_err = '/tmp/job2_NEW.err'
    job2_result = '/tmp/job2.IT'
    increment_task_message = [
        'EXECUTE_TASK', '2', job2_out, job2_err, '0', '1', 'true', 'null',
        'METHOD', 'test_piper', 'increment', '0', '1', 'localhost', '1',
        'false', '9', '1', '2', '4', '3', 'null', 'value', 'null', '1', '9',
        '3', '#', '$return_0', 'null',
        job2_result + ':d1v2_1599560599402.IT:false:true:' + job2_result, '-',
        '0', '0'
    ]
    increment_task_message_str = " ".join(increment_task_message)
    print("Requesting: " + increment_task_message_str)
    os.write(executor_out, increment_task_message_str + '\n')  # noqa
    time.sleep(2)
    # Send quit message
    os.write(executor_out, b"QUIT\n")
    os.write(worker_out, b"QUIT\n")
    # Wait for the worker to finish
    worker.join()
    # Cleanup
    # Close pipes
    os.close(executor_out)
    os.close(executor_in)
    os.close(worker_out)
    os.close(worker_in)
    # Remove pipes
    for pipe in pipes:
        os.unlink(pipe)
        if os.path.isfile(pipe):
            os.remove(pipe)
    # Check logs
    out_log = "log/binding_worker.out"
    err_log = "log/binding_worker.err"
    if os.path.exists(err_log):
        raise Exception("An error happened. Please check " + err_log)
    with open(out_log, 'r') as f:
        if 'ERROR' in f.read():
            raise Exception("An error happened. Please check " + out_log)
        if 'Traceback' in f.read():
            raise Exception("An error happened. Please check " + out_log)
    # Check task 1
    check_task(job1_out, job1_err)
    # Check task 2
    check_task(job2_out, job2_err)
    result = deserialize_from_file(job2_result)
    if result != 2:
        raise Exception(
            "Wrong result obtained for increment task. Expected 2, received: "
            + str(result))  # noqa

    # Remove logs
    os.remove(job1_out)
    os.remove(job1_err)
    os.remove(job2_out)
    os.remove(job2_err)
    os.remove(job2_result)
    if os.path.isfile(err_log):
        os.remove(err_log)
    if os.path.isfile(out_log):
        os.remove(out_log)
    if os.path.isfile(current_path + "/../../../../std.out"):
        os.remove(current_path + "/../../../../std.out")
    if os.path.isfile(current_path + "/../../../../std.err"):
        os.remove(current_path + "/../../../../std.err")
    # Restore sys.argv and sys.path
    sys.argv = sys_argv_backup
    sys.path = sys_path_backup
Exemple #11
0
def execute_task(process_name,
                 storage_conf,
                 params,
                 tracing,
                 logger,
                 python_mpi=False):
    """
    ExecuteTask main method.

    :param process_name: Process name
    :param storage_conf: Storage configuration file path
    :param params: List of parameters
    :param tracing: Tracing flag
    :param logger: Logger to use
    :param python_mpi: If it is a MPI task
    :return: exit code, new types and new values
    """
    if __debug__:
        logger.debug("Begin task execution in %s" % process_name)

    persistent_storage = False
    if storage_conf != 'null':
        persistent_storage = True

    # Retrieve the parameters from the params argument
    path = params[0]
    method_name = params[1]
    num_slaves = int(params[3])
    time_out = int(params[2])
    slaves = []
    for i in range(3, 3 + num_slaves):
        slaves.append(params[i])
    arg_position = 4 + num_slaves

    args = params[arg_position:]
    cus = args[0]
    args = args[1:]
    has_target = args[0]
    return_type = args[1]
    return_length = int(args[2])
    num_params = int(args[3])

    args = args[4:]

    # COMPSs keywords for tasks (ie: tracing, process name...)
    # compss_key is included to be checked in the @task decorator, so that
    # the task knows if it has been called from the worker or from the
    # user code (reason: ignore @task decorator if called from another task).
    compss_kwargs = {
        'compss_key': True,
        'compss_tracing': tracing,
        'compss_process_name': process_name,
        'compss_storage_conf': storage_conf,
        'compss_return_length': return_length,
        'python_MPI': python_mpi
    }

    if __debug__:
        logger.debug("Storage conf: %s" % str(storage_conf))
        logger.debug("Params: %s" % str(params))
        logger.debug("Path: %s" % str(path))
        logger.debug("Method name: %s" % str(method_name))
        logger.debug("Num slaves: %s" % str(num_slaves))
        logger.debug("Slaves: %s" % str(slaves))
        logger.debug("Cus: %s" % str(cus))
        logger.debug("Has target: %s" % str(has_target))
        logger.debug("Num Params: %s" % str(num_params))
        logger.debug("Return Length: %s" % str(return_length))
        logger.debug("Args: %r" % args)

    # Get all parameter values
    if __debug__:
        logger.debug("Processing parameters:")
    values = get_input_params(num_params, logger, args)
    types = [x.type for x in values]

    if __debug__:
        logger.debug("RUN TASK with arguments:")
        logger.debug("\t- Path: %s" % path)
        logger.debug("\t- Method/function name: %s" % method_name)
        logger.debug("\t- Has target: %s" % str(has_target))
        logger.debug("\t- # parameters: %s" % str(num_params))
        logger.debug("\t- Values:")
        for v in values:
            logger.debug("\t\t %r" % v)
        logger.debug("\t- COMPSs types:")
        for t in types:
            logger.debug("\t\t %s" % str(t))

    import_error = False

    new_types = []
    new_values = []
    timed_out = False

    try:
        # Try to import the module (for functions)
        if __debug__:
            logger.debug("Trying to import the user module: %s" % path)
        py_version = sys.version_info
        if py_version >= (2, 7):
            import importlib
            module = importlib.import_module(path)  # Python 2.7
            if path.startswith('InteractiveMode_'):
                # Force reload in interactive mode. The user may have
                # overwritten a function or task.
                if py_version < (3, 0):
                    reload(module)
                elif py_version < (3, 4):
                    import imp
                    imp.reload(module)
                else:
                    importlib.reload(module)
            if __debug__:
                msg = "Module successfully loaded (Python version >= 2.7)"
                logger.debug(msg)
        else:
            module = __import__(path, globals(), locals(), [path], -1)
            if __debug__:
                msg = "Module successfully loaded (Python version < 2.7"
                logger.debug(msg)
    except ImportError:
        if __debug__:
            msg = "Could not import the module. Reason: Method in class."
            logger.debug(msg)
        import_error = True

    if not import_error:
        # Module method declared as task
        result = task_execution(logger, process_name, module, method_name,
                                time_out, types, values, compss_kwargs,
                                persistent_storage, storage_conf)
        exit_code = result[0]
        new_types = result[1]
        new_values = result[2]
        target_direction = result[3]
        timed_out = result[4]
        except_msg = result[5]

        if exit_code != 0:
            return exit_code, new_types, new_values, timed_out, except_msg

    else:
        # Method declared as task in class
        # Not the path of a module, it ends with a class name
        class_name = path.split('.')[-1]
        module_name = '.'.join(path.split('.')[0:-1])

        if '.' in path:
            module_name = '.'.join(path.split('.')[0:-1])
        else:
            module_name = path
        try:
            module = __import__(module_name, fromlist=[class_name])
            klass = getattr(module, class_name)
        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            lines = traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)
            logger.exception("EXCEPTION IMPORTING MODULE IN %s" % process_name)
            logger.exception(''.join(line for line in lines))
            return 1, [], [], False, None

        if __debug__:
            logger.debug("Method in class %s of module %s" %
                         (class_name, module_name))
            logger.debug("Has target: %s" % str(has_target))

        if has_target == 'true':
            # Instance method
            # The self object needs to be an object in order to call the
            # function. So, it can not be done in the @task decorator.
            # Since the args structure is parameters + self + returns we pop
            # the corresponding considering the return_length notified by the
            # runtime (-1 due to index starts from 0).
            self_index = num_params - return_length - 1
            self_elem = values.pop(self_index)
            self_type = types.pop(self_index)
            if self_type == parameter.TYPE.EXTERNAL_PSCO:
                if __debug__:
                    logger.debug("Last element (self) is a PSCO with id: %s" %
                                 str(self_elem.key))
                obj = get_by_id(self_elem.key)
            else:
                obj = None
                file_name = None
                if self_elem.key is None:
                    file_name = self_elem.file_name.split(':')[-1]
                    if __debug__:
                        logger.debug("Deserialize self from file.")
                    try:
                        obj = deserialize_from_file(file_name)
                    except Exception:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        lines = traceback.format_exception(
                            exc_type, exc_value, exc_traceback)
                        logger.exception("EXCEPTION DESERIALIZING SELF IN %s" %
                                         process_name)
                        logger.exception(''.join(line for line in lines))
                        return 1, [], [], False, None
                    if __debug__:
                        logger.debug('Deserialized self object is: %s' %
                                     self_elem.content)
                        logger.debug(
                            "Processing callee, a hidden object of %s in file %s"
                            %  # noqa: E501
                            (file_name, type(self_elem.content)))
            values.insert(0, obj)

            if not self_type == parameter.TYPE.EXTERNAL_PSCO:
                types.insert(0, parameter.TYPE.OBJECT)
            else:
                types.insert(0, parameter.TYPE.EXTERNAL_PSCO)

            result = task_execution(logger, process_name, klass, method_name,
                                    time_out, types, values, compss_kwargs,
                                    persistent_storage, storage_conf)
            exit_code = result[0]
            new_types = result[1]
            new_values = result[2]
            target_direction = result[3]
            timed_out = result[4]
            except_msg = result[5]

            if exit_code != 0:
                return exit_code, new_types, new_values, timed_out, except_msg

            # Depending on the target_direction option, it is necessary to
            # serialize again self or not. Since this option is only visible
            # within the task decorator, the task_execution returns the value
            # of target_direction in order to know here if self has to be
            # serialized. This solution avoids to use inspect.
            if target_direction.direction == parameter.DIRECTION.INOUT or \
                    target_direction.direction == parameter.DIRECTION.COMMUTATIVE:  # noqa: E501
                if is_psco(obj):
                    # There is no explicit update if self is a PSCO.
                    # Consequently, the changes on the PSCO must have been
                    # pushed into the storage automatically on each PSCO
                    # modification.
                    if __debug__:
                        logger.debug("The changes on the PSCO must have been" +
                                     " automatically updated by the storage.")
                    pass
                else:
                    if __debug__:
                        logger.debug("Serializing self to file: %s" %
                                     file_name)
                    try:
                        serialize_to_file(obj, file_name)
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        lines = traceback.format_exception(
                            exc_type, exc_value, exc_traceback)
                        logger.exception("EXCEPTION SERIALIZING SELF IN %s" %
                                         process_name)
                        logger.exception(''.join(line for line in lines))
                        return 1, new_types, new_values, timed_out, except_msg
                    if __debug__:
                        logger.debug("Obj: %r" % obj)
        else:
            # Class method - class is not included in values (e.g. values=[7])
            types.append(None)  # class must be first type

            result = task_execution(logger, process_name, klass, method_name,
                                    time_out, types, values, compss_kwargs,
                                    persistent_storage, storage_conf)
            exit_code = result[0]
            new_types = result[1]
            new_values = result[2]
            target_direction = result[3]
            timed_out = result[4]
            except_msg = result[5]

            if exit_code != 0:
                return exit_code, new_types, new_values, timed_out, except_msg

    # EVERYTHING OK
    if __debug__:
        logger.debug("End task execution. Status: Ok")

    return exit_code, new_types, new_values, timed_out, except_msg
Exemple #12
0
def execute_task(process_name,              # type: str
                 storage_conf,              # type: str
                 params,                    # type: list
                 tracing,                   # type: bool
                 logger,                    # type: typing.Any
                 logger_cfg,                # type: str
                 log_files,                 # type: tuple
                 python_mpi=False,          # type: bool
                 collections_layouts=None,  # type: dict
                 cache_queue=None,          # type: typing.Any
                 cache_ids=None,            # type: typing.Any
                 cache_profiler=False,      # type: bool
                 ):
    # type: (...) -> typing.Tuple[int, list, list, typing.Optional[bool], str]
    """ ExecuteTask main method.

    :param process_name: Process name.
    :param storage_conf: Storage configuration file path.
    :param params: List of parameters.
    :param tracing: Tracing flag.
    :param logger: Logger to use.
    :param logger_cfg: Logger configuration file
    :param log_files: Tuple with (out filename, err filename).
                      None to avoid stdout and sdterr fd redirection.
    :param python_mpi: If it is a MPI task.
    :param collections_layouts: collections layouts for python MPI tasks
    :param cache_queue: Cache tracker communication queue
    :param cache_ids: Cache proxy dictionary (read-only)
    :param cache_profiler: Cache profiler
    :return: updated_args, exit_code, new_types, new_values, timed_out
             and except_msg
    """
    if __debug__:
        logger.debug("BEGIN TASK execution in %s" % process_name)

    persistent_storage = False
    if storage_conf != 'null':
        persistent_storage = True

    # Retrieve the parameters from the params argument
    path = params[0]
    method_name = params[1]
    num_slaves = int(params[3])
    time_out = int(params[2])
    slaves = []
    for i in range(3, 3 + num_slaves):
        slaves.append(params[i])
    arg_position = 4 + num_slaves

    args = params[arg_position:]
    cus = args[0]  # noqa
    args = args[1:]
    has_target = args[0]
    # Next parameter: return_type = args[1]
    return_length = int(args[2])
    num_params = int(args[3])

    args = args[4:]

    # COMPSs keywords for tasks (ie: tracing, process name...)
    # compss_key is included to be checked in the @task decorator, so that
    # the task knows if it has been called from the worker or from the
    # user code (reason: ignore @task decorator if called from another task
    # or decide if submit to runtime if nesting is enabled).
    compss_kwargs = {
        'compss_key': True,
        'compss_tracing': tracing,
        'compss_process_name': process_name,
        'compss_storage_conf': storage_conf,
        'compss_return_length': return_length,
        'compss_logger': logger,
        'compss_log_cfg': logger_cfg,
        'compss_log_files': log_files,
        'compss_python_MPI': python_mpi,
        'compss_collections_layouts': collections_layouts,
        'cache_queue': cache_queue,
        'cache_ids': cache_ids,
        'cache_profiler': cache_profiler,
    }

    if __debug__:
        logger.debug("COMPSs parameters:")
        logger.debug("\t- Storage conf: %s" % str(storage_conf))
        logger.debug("\t- Logger cfg: %s" % str(logger_cfg))
        if log_files:
            logger.debug("\t- Log out file: %s" % str(log_files[0]))
            logger.debug("\t- Log err file: %s" % str(log_files[1]))
        else:
            logger.debug("\t- Log out and err not redirected")
        logger.debug("\t- Params: %s" % str(params))
        logger.debug("\t- Path: %s" % str(path))
        logger.debug("\t- Method name: %s" % str(method_name))
        logger.debug("\t- Num slaves: %s" % str(num_slaves))
        logger.debug("\t- Slaves: %s" % str(slaves))
        logger.debug("\t- Cus: %s" % str(cus))
        logger.debug("\t- Has target: %s" % str(has_target))
        logger.debug("\t- Num Params: %s" % str(num_params))
        logger.debug("\t- Return Length: %s" % str(return_length))
        logger.debug("\t- Args: %r" % args)
        logger.debug("\t- COMPSs kwargs:")
        for k, v in compss_kwargs.items():
            logger.debug("\t\t- %s: %s" % (str(k), str(v)))

    # Get all parameter values
    if __debug__:
        logger.debug("Processing parameters:")
        # logger.debug(args)
    values = get_task_params(num_params, logger, args)
    types = [x.content_type for x in values]

    if __debug__:
        logger.debug("RUN TASK with arguments:")
        logger.debug("\t- Path: %s" % path)
        logger.debug("\t- Method/function name: %s" % method_name)
        logger.debug("\t- Has target: %s" % str(has_target))
        logger.debug("\t- # parameters: %s" % str(num_params))
        # Next parameters are the values:
        # logger.debug("\t- Values:")
        # for v in values:
        #     logger.debug("\t\t %r" % v)
        # logger.debug("\t- COMPSs types:")
        # for t in types:
        #     logger.debug("\t\t %s" % str(t))

    import_error = False
    if __debug__:
        logger.debug("LOAD TASK:")
    try:
        # Try to import the module (for functions)
        if __debug__:
            logger.debug("\t- Trying to import the user module: %s" % path)
        module = import_user_module(path, logger)
    except ImportError:
        if __debug__:
            msg = "\t- Could not import the module. Reason: Method in class."
            logger.debug(msg)
        import_error = True

    if __debug__:
        logger.debug("EXECUTE TASK:")
    if not import_error:
        # Module method declared as task
        result = task_execution(logger,
                                process_name,
                                module,
                                method_name,
                                time_out,
                                types,
                                values,
                                compss_kwargs,
                                persistent_storage,
                                storage_conf)
        exit_code = result[0]
        new_types = result[1]
        new_values = result[2]
        # Next result: target_direction = result[3]
        timed_out = result[4]
        except_msg = result[5]
    else:
        # Method declared as task in class
        # Not the path of a module, it ends with a class name
        class_name = path.split('.')[-1]

        if '.' in path:
            module_name = '.'.join(path.split('.')[0:-1])
        else:
            module_name = path
        try:
            module = __import__(module_name, fromlist=[class_name])
            klass = getattr(module, class_name)
        except Exception:  # noqa
            exc_type, exc_value, exc_traceback = sys.exc_info()
            lines = traceback.format_exception(exc_type,
                                               exc_value,
                                               exc_traceback)
            exception_message = "EXCEPTION IMPORTING MODULE IN %s\n" % process_name
            exception_message += ''.join(line for line in lines)
            logger.exception(exception_message)
            return 1, [], [], None, exception_message

        if __debug__:
            logger.debug("Method in class %s of module %s" % (class_name,
                                                              module_name))
            logger.debug("Has target: %s" % str(has_target))

        if has_target == 'true':
            # Instance method
            # The self object needs to be an object in order to call the
            # function. So, it can not be done in the @task decorator.
            # Since the args structure is parameters + self + returns we pop
            # the corresponding considering the return_length notified by the
            # runtime (-1 due to index starts from 0).
            self_index = num_params - return_length - 1
            self_elem = values.pop(self_index)
            self_type = types.pop(self_index)
            if self_type == parameter.TYPE.EXTERNAL_PSCO:
                if __debug__:
                    logger.debug("Last element (self) is a PSCO with id: %s" %
                                 str(self_elem.content))
                obj = get_by_id(self_elem.content)
            else:
                obj = None
                file_name = "None"
                if self_elem.content == "":
                    file_name = self_elem.file_name.original_path
                    if __debug__:
                        logger.debug("\t- Deserialize self from file.")
                    try:
                        obj = deserialize_from_file(file_name)
                    except Exception:  # noqa
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        lines = traceback.format_exception(exc_type,
                                                           exc_value,
                                                           exc_traceback)
                        exception_message = "EXCEPTION DESERIALIZING SELF IN %s\n" % process_name
                        exception_message += ''.join(line for line in lines)
                        logger.exception(exception_message)
                        return 1, [], [], None, exception_message
                    if __debug__:
                        logger.debug("Deserialized self object is: %s" %
                                     self_elem.content)
                        logger.debug("Processing callee, a hidden object of %s in file %s" %  # noqa: E501
                                     (file_name, type(self_elem.content)))
            values.insert(0, obj)  # noqa

            if not self_type == parameter.TYPE.EXTERNAL_PSCO:
                types.insert(0, parameter.TYPE.OBJECT)
            else:
                types.insert(0, parameter.TYPE.EXTERNAL_PSCO)

            result = task_execution(logger,
                                    process_name,
                                    klass,
                                    method_name,
                                    time_out,
                                    types,
                                    values,
                                    compss_kwargs,
                                    persistent_storage,
                                    storage_conf)
            exit_code = result[0]
            new_types = result[1]
            new_values = result[2]
            target_direction = result[3]
            timed_out = result[4]
            except_msg = result[5]

            # Depending on the target_direction option, it is necessary to
            # serialize again self or not. Since this option is only visible
            # within the task decorator, the task_execution returns the value
            # of target_direction in order to know here if self has to be
            # serialized. This solution avoids to use inspect.
            if target_direction is not None and \
                    (target_direction.direction == parameter.DIRECTION.INOUT or
                     target_direction.direction == parameter.DIRECTION.COMMUTATIVE):  # noqa: E501
                if is_psco(obj):
                    # There is no explicit update if self is a PSCO.
                    # Consequently, the changes on the PSCO must have been
                    # pushed into the storage automatically on each PSCO
                    # modification.
                    if __debug__:
                        logger.debug("The changes on the PSCO must have been" +
                                     " automatically updated by the storage.")
                else:
                    if __debug__:
                        logger.debug("Serializing self (%r) to file: %s" %
                                     (obj, file_name))
                    try:
                        serialize_to_file(obj, file_name)
                    except Exception:  # noqa
                        # Catch any serialization exception
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        lines = traceback.format_exception(exc_type,
                                                           exc_value,
                                                           exc_traceback)
                        logger.exception("EXCEPTION SERIALIZING SELF IN %s" % process_name)  # noqa: E501
                        logger.exception(''.join(line for line in lines))
                        exit_code = 1
                    if __debug__:
                        logger.debug("Serialized successfully")
        else:
            # Class method - class is not included in values (e.g. values=[7])
            types.append(None)  # class must be first type

            result = task_execution(logger,
                                    process_name,
                                    klass,
                                    method_name,
                                    time_out,
                                    types,
                                    values,
                                    compss_kwargs,
                                    persistent_storage,
                                    storage_conf)
            exit_code = result[0]
            new_types = result[1]
            new_values = result[2]
            # Next return: target_direction = result[3]
            timed_out = result[4]
            except_msg = result[5]

    if __debug__:
        if exit_code != 0:
            logger.debug("EXECUTE TASK FAILED: Exit code: %s" % str(exit_code))
        else:
            logger.debug("END TASK execution. Status: Ok")

    return int(exit_code), new_types, new_values, timed_out, except_msg
Exemple #13
0
def _synchronize(obj, mode):
    # type: (object, int) -> object
    """ Synchronization function.

    This method retrieves the value of a future object.
    Calls the runtime in order to wait for the value and returns it when
    received.

    :param obj: Object to synchronize.
    :param mode: Direction of the object to synchronize.
    :return: The value of the object requested.
    """
    # TODO: Add a boolean to differentiate between files and object on the
    # COMPSs.open_file call. This change pretends to obtain better traces.
    # Must be implemented first in the Runtime, then in the bindings common
    # C API and finally add the boolean here
    app_id = 0
    if is_psco(obj):
        obj_id = get_id(obj)
        if not OT_is_pending_to_synchronize(obj_id):
            return obj
        else:
            # file_path is of the form storage://pscoId or
            # file://sys_path_to_file
            file_path = COMPSs.open_file(app_id,
                                         "".join(("storage://", str(obj_id))),
                                         mode)
            # TODO: Add switch on protocol (first parameter returned currently ignored)
            _, file_name = file_path.split("://")
            new_obj = get_by_id(file_name)
            OT_stop_tracking(obj)
            return new_obj

    obj_id = OT_is_tracked(obj)
    if obj_id is None:  # Not being tracked
        return obj
    if not OT_is_pending_to_synchronize(obj_id):
        return obj

    if __debug__:
        logger.debug("Synchronizing object %s with mode %s" % (obj_id, mode))

    file_name = OT_get_file_name(obj_id)
    compss_file = COMPSs.open_file(app_id, file_name, mode)

    # Runtime can return a path or a PSCOId
    if compss_file.startswith('/'):
        # If the real filename is null, then return None. The task that
        # produces the output file may have been ignored or cancelled, so its
        # result does not exist.
        real_file_name = compss_file.split('/')[-1]
        if real_file_name == "null":
            print("WARNING: Could not retrieve the object " + str(file_name) +
                  " since the task that produces it may have been IGNORED or CANCELLED. Please, check the logs. Returning None.")  # noqa: E501
            return None
        new_obj = deserialize_from_file(compss_file)
        COMPSs.close_file(app_id, file_name, mode)
    else:
        new_obj = get_by_id(compss_file)

    if mode == 'r':
        OT_update_mapping(obj_id, new_obj)

    if mode != 'r':
        COMPSs.delete_file(app_id, OT_get_file_name(obj_id), False)
        OT_stop_tracking(obj)

    return new_obj
Exemple #14
0
    def retrieve_content(self, argument, name_prefix,
                         python_mpi, collections_layouts,
                         depth=0):
        # type: (Parameter, str, bool, list, int) -> None
        """ Retrieve the content of a particular argument.

        :param argument: Argument.
        :param name_prefix: Name prefix.
        :param python_mpi: If the task is python MPI.
        :param collections_layouts: Layouts of collections params for python MPI tasks.
        :param depth: Collection depth (0 if not a collection).
        :return: None
        """
        if __debug__:
            logger.debug("\t - Revealing: " + str(argument.name))
        # This case is special, as a FILE can actually mean a FILE or an
        # object that is serialized in a file
        if is_vararg(argument.name):
            self.param_varargs = argument.name
            if __debug__:
                logger.debug("\t\t - It is vararg")

        content_type = argument.content_type
        type_file = parameter.TYPE.FILE
        type_directory = parameter.TYPE.DIRECTORY
        type_external_stream = parameter.TYPE.EXTERNAL_STREAM
        type_collection = parameter.TYPE.COLLECTION
        type_external_psco = parameter.TYPE.EXTERNAL_PSCO

        if content_type == type_file:
            if self.is_parameter_an_object(argument.name):
                # The object is stored in some file, load and deserialize
                f_name = argument.file_name.split(':')[-1]
                if __debug__:
                    logger.debug("\t\t - It is an OBJECT. Deserializing from file: " + str(f_name))  # noqa: E501
                argument.content = deserialize_from_file(f_name)
                if __debug__:
                    logger.debug("\t\t - Deserialization finished")
            else:
                # The object is a FILE, just forward the path of the file
                # as a string parameter
                argument.content = argument.file_name.split(':')[-1]
                if __debug__:
                    logger.debug("\t\t - It is FILE: " + str(argument.content))
        elif content_type == type_directory:
            if __debug__:
                logger.debug("\t\t - It is a DIRECTORY")
            argument.content = argument.file_name.split(":")[-1]
        elif content_type == type_external_stream:
            if __debug__:
                logger.debug("\t\t - It is an EXTERNAL STREAM")
            argument.content = deserialize_from_file(argument.file_name)
        elif content_type == type_collection:
            argument.content = []
            # This field is exclusive for COLLECTION_T parameters, so make
            # sure you have checked this parameter is a collection before
            # consulting it
            argument.collection_content = []
            col_f_name = argument.file_name.split(':')[-1]

            # maybe it is an inner-collection..
            _dec_arg = self.decorator_arguments.get(argument.name, None)
            _col_dir = _dec_arg.direction if _dec_arg else None
            _col_dep = _dec_arg.depth if _dec_arg else depth
            if __debug__:
                logger.debug("\t\t - It is a COLLECTION: " + str(col_f_name))
                logger.debug("\t\t\t - Depth: " + str(_col_dep))

            # Check if this collection is in layout
            # Three conditions:
            # 1- this is a mpi task
            # 2- it has a collection layout
            # 3- the current argument is the layout target
            in_mpi_collection_env = False
            if python_mpi and collections_layouts and \
                    collections_layouts[0] == argument.name:
                in_mpi_collection_env = True
                from pycompss.util.mpi.helper import rank_distributor
                # Call rank_distributor if the current param is the target of
                # the layout for each rank, return its offset(s) in the
                # collection.
                rank_distribution = rank_distributor(collections_layouts[1:])
                rank_distr_len = len(rank_distribution)
                if __debug__:
                    logger.debug("Rank distribution is: " + str(rank_distribution))  # noqa: E501

            for (i, line) in enumerate(open(col_f_name, 'r')):
                if in_mpi_collection_env:
                    # Isn't this my offset? skip
                    if i not in rank_distribution:
                        continue
                data_type, content_file, content_type = line.strip().split()
                # Same naming convention as in COMPSsRuntimeImpl.java
                sub_name = "%s.%d" % (argument.name, i)
                if name_prefix:
                    sub_name = "%s.%s" % (name_prefix, argument.name)
                else:
                    sub_name = "@%s" % sub_name

                if __debug__:
                    logger.debug("\t\t\t - Revealing element: " +
                                 str(sub_name))

                if not self.is_parameter_file_collection(argument.name):
                    sub_arg, _ = build_task_parameter(int(data_type),
                                                      parameter.IOSTREAM.UNSPECIFIED,  # noqa: E501
                                                      "",
                                                      sub_name,
                                                      content_file,
                                                      argument.content_type)

                    # if direction of the collection is 'out', it means we
                    # haven't received serialized objects from the Master
                    # (even though parameters have 'file_name', those files
                    # haven't been created yet). plus, inner collections of
                    # col_out params do NOT have 'direction', we identify
                    # them by 'depth'..
                    if _col_dir == parameter.DIRECTION.OUT or \
                            ((_col_dir is None) and _col_dep > 0):
                        # if we are at the last level of COL_OUT param,
                        # create 'empty' instances of elements
                        if _col_dep == 1:
                            temp = create_object_by_con_type(content_type)
                            sub_arg.content = temp
                            # In case that only one element is used in this
                            # mpi rank, the collection list is removed
                            if in_mpi_collection_env and rank_distr_len == 1:
                                argument.content = sub_arg.content
                                argument.content_type = sub_arg.content_type
                            else:
                                argument.content.append(sub_arg.content)
                            argument.collection_content.append(sub_arg)
                        else:
                            self.retrieve_content(sub_arg,
                                                  sub_name,
                                                  python_mpi,
                                                  collections_layouts,
                                                  depth=_col_dep - 1)
                            # In case that only one element is used in this mpi
                            # rank, the collection list is removed
                            if in_mpi_collection_env and rank_distr_len == 1:
                                argument.content = sub_arg.content
                                argument.content_type = sub_arg.content_type
                            else:
                                argument.content.append(sub_arg.content)
                            argument.collection_content.append(sub_arg)
                    else:
                        # Recursively call the retrieve method, fill the
                        # content field in our new taskParameter object
                        self.retrieve_content(sub_arg, sub_name,
                                              python_mpi, collections_layouts)
                        # In case only one element is used in this mpi rank,
                        # the collection list is removed
                        if in_mpi_collection_env and rank_distr_len == 1:
                            argument.content = sub_arg.content
                            argument.content_type = sub_arg.content_type
                        else:
                            argument.content.append(sub_arg.content)
                        argument.collection_content.append(sub_arg)
                else:
                    # In case only one element is used in this mpi rank,
                    # the collection list is removed
                    if in_mpi_collection_env and rank_distr_len == 1:
                        argument.content = content_file
                        argument.content_type = parameter.TYPE.FILE
                    else:
                        argument.content.append(content_file)
                    argument.collection_content.append(content_file)

        elif not self.storage_supports_pipelining() and \
                content_type == type_external_psco:
            if __debug__:
                logger.debug("\t\t - It is a PSCO")
            # The object is a PSCO and the storage does not support
            # pipelining, do a single getByID of the PSCO
            from storage.api import getByID  # noqa
            argument.content = getByID(argument.content)