예제 #1
0
파일: tracker.py 프로젝트: bsc-wdc/compss
def retrieve_object_from_cache(logger, cache_ids, cache_queue, identifier,
                               parameter_name, user_function,
                               cache_profiler):  # noqa
    # type: (typing.Any, typing.Any, Queue, str, str, typing.Callable, bool) -> typing.Any
    """ Retrieve an object from the given cache proxy dict.

    :param logger: Logger where to push messages.
    :param cache_ids: Cache proxy dictionary.
    :param cache_queue: Cache notification queue.
    :param identifier: Object identifier.
    :param parameter_name: Parameter name.
    :param user_function: Function name.
    :param cache_profiler: If cache profiling is enabled.
    :return: The object from cache.
    """
    with event_inside_worker(RETRIEVE_OBJECT_FROM_CACHE_EVENT):
        emit_manual_event_explicit(BINDING_DESERIALIZATION_CACHE_SIZE_TYPE, 0)
        identifier = __get_file_name__(identifier)
        if __debug__:
            logger.debug(HEADER + "Retrieving: " + str(identifier))
        obj_id, obj_shape, obj_d_type, _, obj_hits, shared_type = cache_ids[
            identifier]  # noqa: E501
        output = None  # type: typing.Any
        existing_shm = None  # type: typing.Any
        object_size = 0
        if shared_type == SHARED_MEMORY_TAG:
            existing_shm = SharedMemory(name=obj_id)
            output = np.ndarray(obj_shape,
                                dtype=obj_d_type,
                                buffer=existing_shm.buf)  # noqa: E501
            object_size = len(existing_shm.buf)
        elif shared_type == SHAREABLE_LIST_TAG:
            existing_shm = ShareableList(name=obj_id)
            output = list(existing_shm)
            object_size = len(existing_shm.shm.buf)
        elif shared_type == SHAREABLE_TUPLE_TAG:
            existing_shm = ShareableList(name=obj_id)
            output = tuple(existing_shm)
            object_size = len(existing_shm.shm.buf)
        # Currently unsupported since conversion requires lists of lists.
        # elif shared_type == SHAREABLE_DICT_TAG:
        #     existing_shm = ShareableList(name=obj_id)
        #     output = dict(existing_shm)
        else:
            raise PyCOMPSsException("Unknown cacheable type.")
        if __debug__:
            logger.debug(HEADER + "Retrieved: " + str(identifier))
        emit_manual_event_explicit(BINDING_DESERIALIZATION_CACHE_SIZE_TYPE,
                                   object_size)

        # Profiling
        filename = filename_cleaned(identifier)
        function_name = function_cleaned(user_function)
        if cache_profiler:
            cache_queue.put(("GET", (filename, parameter_name, function_name)))

        # Add hit
        cache_ids[identifier][4] = obj_hits + 1
        return output, existing_shm
예제 #2
0
파일: tracker.py 프로젝트: giulange/compss
def retrieve_object_from_cache(logger, cache_ids, identifier):  # noqa
    # type: (..., ..., str) -> ...
    """ Retrieve an object from the given cache proxy dict.

    :param logger: Logger where to push messages.
    :param cache_ids: Cache proxy dictionary.
    :param identifier: Object identifier.
    :return: The object from cache.
    """
    emit_manual_event_explicit(TASK_EVENTS_DESERIALIZE_SIZE_CACHE, 0)
    identifier = __get_file_name__(identifier)
    if __debug__:
        logger.debug(HEADER + "Retrieving: " + str(identifier))
    obj_id, obj_shape, obj_d_type, _, obj_hits, shared_type = cache_ids[
        identifier]  # noqa: E501
    size = 0
    if shared_type == SHARED_MEMORY_TAG:
        existing_shm = SharedMemory(name=obj_id)
        size = len(existing_shm.buf)
        output = np.ndarray(obj_shape,
                            dtype=obj_d_type,
                            buffer=existing_shm.buf)  # noqa: E501
    elif shared_type == SHAREABLE_LIST_TAG:
        existing_shm = ShareableList(name=obj_id)
        size = len(existing_shm.shm.buf)
        output = list(existing_shm)
    elif shared_type == SHAREABLE_TUPLE_TAG:
        existing_shm = ShareableList(name=obj_id)
        size = len(existing_shm.shm.buf)
        output = tuple(existing_shm)
    # Currently unsupported since conversion requires lists of lists.
    # elif shared_type == SHAREABLE_DICT_TAG:
    #     existing_shm = ShareableList(name=obj_id)
    #     output = dict(existing_shm)
    else:
        raise PyCOMPSsException("Unknown cacheable type.")
    if __debug__:
        logger.debug(HEADER + "Retrieved: " + str(identifier))
    emit_manual_event_explicit(TASK_EVENTS_DESERIALIZE_SIZE_CACHE, size)
    cache_ids[identifier][4] = obj_hits + 1
    return output, existing_shm
예제 #3
0
파일: tracker.py 프로젝트: bsc-wdc/compss
def insert_object_into_cache(logger, cache_queue, obj, f_name, parameter,
                             user_function):  # noqa
    # type: (typing.Any, Queue, typing.Any, str, str, typing.Callable) -> None
    """ Put an object into cache.

    :param logger: Logger where to push messages.
    :param cache_queue: Cache notification queue.
    :param obj: Object to store.
    :param f_name: File name that corresponds to the object (used as id).
    :param parameter: Parameter name.
    :param user_function: Function.
    :return: None
    """
    with event_inside_worker(INSERT_OBJECT_INTO_CACHE_EVENT):
        function = function_cleaned(user_function)
        f_name = __get_file_name__(f_name)
        if __debug__:
            logger.debug(HEADER + "Inserting into cache (%s): %s" %
                         (str(type(obj)), str(f_name)))
        try:
            inserted = True
            if isinstance(obj, np.ndarray):
                emit_manual_event_explicit(
                    BINDING_SERIALIZATION_CACHE_SIZE_TYPE, 0)
                shape = obj.shape
                d_type = obj.dtype
                size = obj.nbytes
                shm = SHARED_MEMORY_MANAGER.SharedMemory(size=size)  # noqa
                within_cache = np.ndarray(shape, dtype=d_type, buffer=shm.buf)
                within_cache[:] = obj[:]  # Copy contents
                new_cache_id = shm.name
                cache_queue.put(("PUT", (f_name, new_cache_id, shape, d_type,
                                         size, SHARED_MEMORY_TAG, parameter,
                                         function)))  # noqa: E501
            elif isinstance(obj, list):
                emit_manual_event_explicit(
                    BINDING_SERIALIZATION_CACHE_SIZE_TYPE, 0)
                sl = SHARED_MEMORY_MANAGER.ShareableList(obj)  # noqa
                new_cache_id = sl.shm.name
                size = total_sizeof(obj)
                cache_queue.put(("PUT", (f_name, new_cache_id, 0, 0, size,
                                         SHAREABLE_LIST_TAG, parameter,
                                         function)))  # noqa: E501
            elif isinstance(obj, tuple):
                emit_manual_event_explicit(
                    BINDING_SERIALIZATION_CACHE_SIZE_TYPE, 0)
                sl = SHARED_MEMORY_MANAGER.ShareableList(obj)  # noqa
                new_cache_id = sl.shm.name
                size = total_sizeof(obj)
                cache_queue.put(("PUT", (f_name, new_cache_id, 0, 0, size,
                                         SHAREABLE_TUPLE_TAG, parameter,
                                         function)))  # noqa: E501
            # Unsupported dicts since they are lists of lists when converted.
            # elif isinstance(obj, dict):
            #     # Convert dict to list of tuples
            #     list_tuples = list(zip(obj.keys(), obj.values()))
            #     sl = SHARED_MEMORY_MANAGER.ShareableList(list_tuples)  # noqa
            #     new_cache_id = sl.shm.name
            #     size = total_sizeof(obj)
            #     cache_queue.put(("PUT", (f_name, new_cache_id, 0, 0, size, SHAREABLE_DICT_TAG, parameter, function)))  # noqa: E501
            else:
                inserted = False
                if __debug__:
                    logger.debug(
                        HEADER +
                        "Can not put into cache: Not a [np.ndarray | list | tuple ] object"
                    )  # noqa: E501
            if inserted:
                emit_manual_event_explicit(
                    BINDING_SERIALIZATION_CACHE_SIZE_TYPE, size)
            if __debug__ and inserted:
                logger.debug(HEADER + "Inserted into cache: " + str(f_name) +
                             " as " + str(new_cache_id))  # noqa: E501
        except KeyError as e:  # noqa
            if __debug__:
                logger.debug(
                    HEADER +
                    "Can not put into cache. It may be a [np.ndarray | list | tuple ] object containing an unsupported type"
                )  # noqa: E501
                logger.debug(str(e))
예제 #4
0
def deserialize_from_handler(handler, show_exception=True):
    # type: (..., bool) -> object
    """ Deserialize an object from a file.

    :param handler: File name from where the object is going to be
                    deserialized.
    :param show_exception: Show exception if happen (only with debug).
    :return: The object and if the handler has to be closed.
    :raises SerializerException: If deserialization can not be done.
    """
    # Retrieve the used library (if possible)
    emit_manual_event_explicit(DESERIALIZATION_SIZE_EVENTS, 0)
    if hasattr(handler, 'name'):
        emit_manual_event_explicit(
            DESERIALIZATION_OBJECT_NUM,
            (abs(hash(os.path.basename(handler.name))) % platform_c_maxint))
    original_position = None
    try:
        original_position = handler.tell()
        serializer = IDX2LIB[int(handler.read(4))]
    except KeyError:
        # The first 4 bytes return a value that is not within IDX2LIB
        handler.seek(original_position)
        error_message = 'Handler does not refer to a valid PyCOMPSs object'
        raise SerializerException(error_message)

    close_handler = True
    try:
        if DISABLE_GC:
            # Disable the garbage collector while serializing -> performance?
            gc.disable()
        if serializer is numpy and NUMPY_AVAILABLE:
            ret = serializer.load(handler, allow_pickle=False)
        elif serializer is pyarrow and PYARROW_AVAILABLE:
            ret = pyarrow.ipc.open_file(handler)
            if isinstance(ret, pyarrow.ipc.RecordBatchFileReader):
                close_handler = False
        else:
            ret = serializer.load(handler)
        # Special case: deserialized obj wraps a generator
        if isinstance(ret, tuple) and \
                ret and \
                isinstance(ret[0], GeneratorIndicator):
            ret = convert_to_generator(ret[1])
        if DISABLE_GC:
            # Enable the garbage collector and force to clean the memory
            gc.enable()
            gc.collect()
        emit_manual_event_explicit(DESERIALIZATION_SIZE_EVENTS, handler.tell())
        emit_manual_event_explicit(DESERIALIZATION_OBJECT_NUM, 0)
        return ret, close_handler
    except Exception:
        if DISABLE_GC:
            gc.enable()
        if __debug__ and show_exception:
            print('ERROR! Deserialization with %s failed.' % str(serializer))
            try:
                traceback.print_exc()
            except AttributeError:
                # Bug fixed in 3.5 - issue10805
                pass
        raise SerializerException('Cannot deserialize object')
예제 #5
0
def serialize_to_handler(obj, handler):
    # type: (object, ...) -> None
    """ Serialize an object to a handler.

    :param obj: Object to be serialized.
    :param handler: A handler object. It must implement methods like write,
                    writeline and similar stuff.
    :return: none
    :raises SerializerException: If something wrong happens during
                                 serialization.
    """
    emit_manual_event_explicit(SERIALIZATION_SIZE_EVENTS, 0)
    if hasattr(handler, 'name'):
        emit_manual_event_explicit(
            SERIALIZATION_OBJECT_NUM,
            (abs(hash(os.path.basename(handler.name))) % platform_c_maxint))
    if DISABLE_GC:
        # Disable the garbage collector while serializing -> more performance?
        gc.disable()
    # Get the serializer priority
    serializer_priority = get_serializer_priority(obj)
    i = 0
    success = False
    original_position = handler.tell()
    # Lets try the serializers in the given priority
    while i < len(serializer_priority) and not success:
        # Reset the handlers pointer to the first position
        handler.seek(original_position)
        serializer = serializer_priority[i]
        handler.write(bytearray('%04d' % LIB2IDX[serializer], 'utf8'))

        # Special case: obj is a generator
        if isinstance(obj, types.GeneratorType):
            try:
                pickle_generator(obj, handler, serializer)
                success = True
            except Exception:  # noqa
                if __debug__:
                    traceback.print_exc()
        # General case
        else:
            try:
                # If it is a numpy object then use its saving mechanism
                if serializer is numpy and \
                        NUMPY_AVAILABLE and \
                        (isinstance(obj, numpy.ndarray) or
                         isinstance(obj, numpy.matrix)):
                    serializer.save(handler, obj, allow_pickle=False)
                elif serializer is pyarrow and \
                        PYARROW_AVAILABLE and \
                        object_belongs_to_module(obj, "pyarrow"):
                    writer = pyarrow.ipc.new_file(handler, obj.schema)  # noqa
                    writer.write(obj)
                    writer.close()
                else:
                    serializer.dump(obj,
                                    handler,
                                    protocol=serializer.HIGHEST_PROTOCOL)
                success = True
            except Exception:  # noqa
                success = False
        i += 1
    emit_manual_event_explicit(SERIALIZATION_SIZE_EVENTS, handler.tell())
    emit_manual_event_explicit(SERIALIZATION_OBJECT_NUM, 0)
    if DISABLE_GC:
        # Enable the garbage collector and force to clean the memory
        gc.enable()
        gc.collect()

    # if ret_value is None then all the serializers have failed
    if not success:
        try:
            traceback.print_exc()
        except AttributeError:
            # Bug fixed in 3.5 - issue10805
            pass
        raise SerializerException('Cannot serialize object %s' % obj)