Example #1
0
def _init_devices():
    """Internal function that is used to initialize the Python mapping of 
       physical offload targets to a Python dictionary.
    """
    no_of_devices = number_of_devices()
    debug(5, "found {0} device(s)", no_of_devices)
    device_dict = {}
    for d in range(0, no_of_devices):
        device_dict[d] = OffloadDevice(d)
    return device_dict
Example #2
0
def _init_devices():
    """Internal function that is used to initialize the Python mapping of 
       physical offload targets to a Python dictionary.
    """
    no_of_devices = number_of_devices()
    debug(5, "found {0} device(s)", no_of_devices)
    device_dict = {}
    for d in range(0, no_of_devices):
        device_dict[d] = OffloadDevice(d)
    return device_dict
Example #3
0
    def __init__(self, device=None):
        # save a reference to the device
        assert device is not None
        self._device = device
        self._device_id = device.device_id

        # construct the stream
        self._stream_id = _pymic_impl_stream_create(self._device_id, 'stream')
        debug(1, 
              'created stream 0x{0:x} for device {1}'.format(self._stream_id,
                                                             self._device_id))
Example #4
0
    def __init__(self, device=None):
        # save a reference to the device
        assert device is not None
        self._device = device
        self._device_id = device.device_id

        # construct the stream
        self._stream_id = _pymic_impl_stream_create(self._device_id, 'stream')
        debug(
            1, 'created stream 0x{0:x} for device {1}'.format(
                self._stream_id, self._device_id))
Example #5
0
    def _trace_func(func):
        funcname = func.__name__
        debug(5, "collecting statistics for {0}", funcname)

        def wrapper(*args, **kwargs):
            tstart = timeit.default_timer()
            rv = func(*args, **kwargs)
            tend = timeit.default_timer()
            stack_info = _stack_walk_func()
            _trace_database.register(funcname, tstart, tend,
                                     args, kwargs, stack_info)
            return rv
        return wrapper
Example #6
0
    def _trace_func(func):
        funcname = func.__name__
        debug(5, "collecting statistics for {0}", funcname)

        def wrapper(*args, **kwargs):
            tstart = timeit.default_timer()
            rv = func(*args, **kwargs)
            tend = timeit.default_timer()
            stack_info = _stack_walk_func()
            _trace_database.register(funcname, tstart, tend, args, kwargs,
                                     stack_info)
            return rv

        return wrapper
Example #7
0
    def __init__(self, library, device=None):
        """Initialize this OffloadLibrary instance.  This function is not to be
           called from outside pymic.
        """

        # safety checks
        assert device is not None

        # bookkeeping
        self._library = library
        self._device = device
        self._device_id = device._map_dev_id()
        self.unloader = _pymic_impl_unload_library
        self._cache = {}

        # locate the library on the host file system
        debug(5, "searching for {0} in {1}", library, config._search_path)
        filename = OffloadLibrary._find_library(library)
        if filename is None:
            debug(5, "no suitable library found for '{0}'", library)
            raise OffloadError("Cannot find library '{0}' "
                               "in PYMIC_LIBRARY_PATH".format(library))

        # load the library and memorize handle
        debug(5, "loading '{0}' on device {1}", filename, self._device_id)
        self._tempfile, self._handle = (_pymic_impl_load_library(
            self._device_id, filename))
        debug(5, "successfully loaded '{0}' on device {1} with handle 0x{2:x}",
              filename, self._device_id, self._handle)
Example #8
0
    def __init__(self, library, device=None):
        """Initialize this OffloadLibrary instance.  This function is not to be
           called from outside pymic.
        """

        # safety checks
        assert device is not None

        # bookkeeping
        self._library = library
        self._device = device
        self._device_id = device._map_dev_id()
        self.unloader = _pymic_impl_unload_library
        self._cache = {}

        # locate the library on the host file system
        debug(5, "searching for {0} in {1}", library, config._search_path)
        filename = OffloadLibrary._find_library(library)
        if filename is None:
            debug(5, "no suitable library found for '{0}'", library)
            raise OffloadError("Cannot find library '{0}' "
                               "in PYMIC_LIBRARY_PATH".format(library))

        # load the library and memorize handle
        debug(5, "loading '{0}' on device {1}", filename, self._device_id)
        self._tempfile, self._handle = (
            _pymic_impl_load_library(self._device_id, filename))
        debug(5, "successfully loaded '{0}' on device {1} with handle 0x{2:x}",
              filename, self._device_id, self._handle)
    def _find_library(library):
        if os.path.isabs(library) and OffloadLibrary._check_k1om(library):
            abspath = library
        else:
            for path in config._search_path.split(os.pathsep):
                debug(5, "    looking for {0} in {1}", library, path)
                abspath = os.path.join(path, library)

                if (os.path.isfile(abspath) and
                    OffloadLibrary._check_k1om(abspath)):
                    break
            else:
                return

        return abspath
Example #10
0
    def allocate_device_memory(self, nbytes, alignment=64, sticky=False):
        """Allocate device memory on device associated with the invoking
           stream object.  Though it is part of the stream interface,
           the operation is synchronous.  
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           nbytes : int
              Number of bytes to allocate
           alignment : int
              Alignment of the data on the target device.
           
           See Also
           --------
           deallocate_device_memory, transfer_host2device, 
           transfer_device2host, transfer_device2device

           Returns
           -------
           out : int
              Fake pointer that identifies the allocated memory

           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> print ptr
           140297169571840
        """
    
        device = self._device_id
    
        if nbytes <= 0:
            raise ValueError('Cannot allocate negative amount of '
                             'memory: {0}'.format(nbytes))
        
        device_ptr = _pymic_impl_stream_allocate(device, self._stream_id,
                                                 nbytes, alignment)
        debug(2, 'allocated {0} bytes on device {1} at 0x{2:x}'
                 ', alignment {3}', 
                 nbytes, device, device_ptr, alignment)
               
        return SmartPtr(self, device, device_ptr, sticky)
Example #11
0
    def allocate_device_memory(self, nbytes, alignment=64, sticky=False):
        """Allocate device memory on device associated with the invoking
           stream object.  Though it is part of the stream interface,
           the operation is synchronous.  
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           nbytes : int
              Number of bytes to allocate
           alignment : int
              Alignment of the data on the target device.
           
           See Also
           --------
           deallocate_device_memory, transfer_host2device, 
           transfer_device2host, transfer_device2device

           Returns
           -------
           out : int
              Fake pointer that identifies the allocated memory

           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> print ptr
           140297169571840
        """

        device = self._device_id

        if nbytes <= 0:
            raise ValueError('Cannot allocate negative amount of '
                             'memory: {0}'.format(nbytes))

        device_ptr = _pymic_impl_stream_allocate(device, self._stream_id,
                                                 nbytes, alignment)
        debug(2, 'allocated {0} bytes on device {1} at 0x{2:x}'
              ', alignment {3}', nbytes, device, device_ptr, alignment)

        return SmartPtr(self, device, device_ptr, sticky)
Example #12
0
    def deallocate_device_memory(self, device_ptr):
        """Deallocate device memory previously allocated through
           allocate_device_memory.  Though it is part of the stream 
           interface, the operation is synchronous. 
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           device_ptr : int
              Fake pointer of memory do deallocate
           
           See Also
           --------
           allocate_device_memory, transfer_host2device, 
           transfer_device2host, transfer_device2device

           Returns
           -------
           None
           
           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> stream.deallocate_device_memory(ptr)
        """
        
        device = self._device
    
        if device_ptr is None:
            raise ValueError('Cannot deallocate None pointer')
        if not isinstance(device_ptr, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device 
        #       and stream)
        
        _pymic_impl_stream_deallocate(self._device_id, self._stream_id, 
                                      device_ptr._device_ptr)
        debug(2, 'deallocated pointer {0} on device {1}',
                 device_ptr, device)
        
        return None
Example #13
0
    def deallocate_device_memory(self, device_ptr):
        """Deallocate device memory previously allocated through
           allocate_device_memory.  Though it is part of the stream 
           interface, the operation is synchronous. 
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           device_ptr : int
              Fake pointer of memory do deallocate
           
           See Also
           --------
           allocate_device_memory, transfer_host2device, 
           transfer_device2host, transfer_device2device

           Returns
           -------
           None
           
           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> stream.deallocate_device_memory(ptr)
        """

        device = self._device

        if device_ptr is None:
            raise ValueError('Cannot deallocate None pointer')
        if not isinstance(device_ptr, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)

        _pymic_impl_stream_deallocate(self._device_id, self._stream_id,
                                      device_ptr._device_ptr)
        debug(2, 'deallocated pointer {0} on device {1}', device_ptr, device)

        return None
Example #14
0
    def invoke(self, kernel, *args):
        """Invoke a native kernel on the target device by enqueuing a request
           in the current stream.  The kernel is identified by accessing its 
           library's attribute with the same name as the kernel name. The 
           kernel function needs to be in a shared-object library that has 
           been loaded by calling the load_library of the target device 
           before invoke.

           The additional arguments of invoke can be either instances
           of OffloadArray, numpy.ndarray, or scalar data.  For numpy.ndarray
           or scalar arguments, invoke automatically performs copy-in and
           copy-out of the argument, that is, before the kernel is invoked,
           the argument is automatically transferred to the target device and
           transferred back after the kernel has finished.

           All operations (copy in/copy out and invocation) are enqueued into
           the stream object and complete asynchronously.
           
           Parameters
           ----------
           kernel : kernel
              Kernel to be invoked
           args : OffloadArray, numpy.ndarray, or scalar type
              Arguments to be passed to the kernel function

           See Also
           --------
           load_library

           Returns
           -------
           None

           Examples
           --------
           >>> library = device.load_library("libdgemm")
           >>> stream.invoke(library.dgemm, A, B, C, n, m, k)
        """

        # if called from wrapper, actual arguments are wrapped in an 
        # extra tuple, so we unwrap them
        if len(args):
            if type(args[0]) == tuple:
                args = args[0]

        # safety check: avoid invoking a kernel if it's library has been loaded
        # on a different device
        if kernel[2] is not self._device:
            raise OffloadError("Cannot invoke kernel, "
                               "library not loaded on device")
        
        # determine the types of the arguments (scalar vs arrays);
        # we store the device pointers as 64-bit integers in an ndarray
        arg_dims = numpy.empty((len(args),), dtype=numpy.int64)
        arg_type = numpy.empty((len(args),), dtype=numpy.int64)
        arg_ptrs = numpy.empty((len(args),), dtype=numpy.int64)
        arg_size = numpy.empty((len(args),), dtype=numpy.int64)
        copy_in_out = []
        scalars = []
        for i, a in enumerate(args):
            if isinstance(a, pymic.OffloadArray):
                # get the device pointer of the OffloadArray and
                # pass it to the kernel
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = a._device_ptr._device_ptr # fake pointer
                arg_size[i] = a._nbytes
            elif isinstance(a, numpy.ndarray):
                # allocate device buffer on the target of the invoke
                # and mark the numpy.ndarray for copyin/copyout semantics
                host_ptr = a.ctypes.data # raw C pointer to host data
                nbytes = a.dtype.itemsize * a.size
                dev_ptr = self.allocate_device_memory(nbytes) 
                copy_in_out.append((host_ptr, dev_ptr, nbytes, a))
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = dev_ptr._device_ptr    # fake pointer
                arg_size[i] = nbytes
            else:
                # this is a hack, but let's wrap scalars as numpy arrays
                cvtd = numpy.asarray(a) 
                host_ptr = cvtd.ctypes.data  # raw C pointer to host data 
                nbytes = cvtd.dtype.itemsize * cvtd.size
                scalars.append(cvtd)
                arg_dims[i] = 0
                arg_type[i] = map_data_types(cvtd.dtype)
                arg_ptrs[i] = host_ptr
                arg_size[i] = nbytes
        debug(1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' "
                 "(pointer 0x{3:x}) with {4} "
                 "argument(s) ({5} copy-in/copy-out, {6} scalars)", 
                 self._device_id, self._stream_id, kernel[0], kernel[1], 
                 len(args), len(copy_in_out), len(scalars))
        # iterate over the copyin arguments and transfer them
        for c in copy_in_out:
            self.transfer_host2device(c[0], c[1], c[2])
        _pymic_impl_invoke_kernel(self._device_id, self._stream_id, kernel[1], 
                                  arg_dims, arg_type, arg_ptrs, arg_size, 
                                  len(args))
        # iterate over the copyout arguments, transfer them back,
        # and release buffers
        for c in copy_in_out:
            self.transfer_device2host(c[1], c[0], c[2])
            self.deallocate_device_memory(c[1])
        return None
Example #15
0
 def __del__(self):
     debug(
         1, 'destroying stream 0x{0:0x} '
         'for device {1}'.format(self._stream_id, self._device_id))
     _pymic_impl_stream_destroy(self._device_id, self._stream_id)
Example #16
0
    def invoke(self, kernel, *args):
        """Invoke a native kernel on the target device by enqueuing a request
           in the current stream.  The kernel is identified by accessing its 
           library's attribute with the same name as the kernel name. The 
           kernel function needs to be in a shared-object library that has 
           been loaded by calling the load_library of the target device 
           before invoke.

           The additional arguments of invoke can be either instances
           of OffloadArray, numpy.ndarray, or scalar data.  For numpy.ndarray
           or scalar arguments, invoke automatically performs copy-in and
           copy-out of the argument, that is, before the kernel is invoked,
           the argument is automatically transferred to the target device and
           transferred back after the kernel has finished.

           All operations (copy in/copy out and invocation) are enqueued into
           the stream object and complete asynchronously.
           
           Parameters
           ----------
           kernel : kernel
              Kernel to be invoked
           args : OffloadArray, numpy.ndarray, or scalar type
              Arguments to be passed to the kernel function

           See Also
           --------
           load_library

           Returns
           -------
           None

           Examples
           --------
           >>> library = device.load_library("libdgemm")
           >>> stream.invoke(library.dgemm, A, B, C, n, m, k)
        """

        # if called from wrapper, actual arguments are wrapped in an
        # extra tuple, so we unwrap them
        if len(args):
            if type(args[0]) == tuple:
                args = args[0]

        # safety check: avoid invoking a kernel if it's library has been loaded
        # on a different device
        if kernel[2] is not self._device:
            raise OffloadError("Cannot invoke kernel, "
                               "library not loaded on device")

        # determine the types of the arguments (scalar vs arrays);
        # we store the device pointers as 64-bit integers in an ndarray
        arg_dims = numpy.empty((len(args), ), dtype=numpy.int64)
        arg_type = numpy.empty((len(args), ), dtype=numpy.int64)
        arg_ptrs = numpy.empty((len(args), ), dtype=numpy.int64)
        arg_size = numpy.empty((len(args), ), dtype=numpy.int64)
        copy_in_out = []
        scalars = []
        for i, a in enumerate(args):
            if isinstance(a, pymic.OffloadArray):
                # get the device pointer of the OffloadArray and
                # pass it to the kernel
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = a._device_ptr._device_ptr  # fake pointer
                arg_size[i] = a._nbytes
            elif isinstance(a, numpy.ndarray):
                # allocate device buffer on the target of the invoke
                # and mark the numpy.ndarray for copyin/copyout semantics
                host_ptr = a.ctypes.data  # raw C pointer to host data
                nbytes = a.dtype.itemsize * a.size
                dev_ptr = self.allocate_device_memory(nbytes)
                copy_in_out.append((host_ptr, dev_ptr, nbytes, a))
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = dev_ptr._device_ptr  # fake pointer
                arg_size[i] = nbytes
            else:
                # this is a hack, but let's wrap scalars as numpy arrays
                cvtd = numpy.asarray(a)
                host_ptr = cvtd.ctypes.data  # raw C pointer to host data
                nbytes = cvtd.dtype.itemsize * cvtd.size
                scalars.append(cvtd)
                arg_dims[i] = 0
                arg_type[i] = map_data_types(cvtd.dtype)
                arg_ptrs[i] = host_ptr
                arg_size[i] = nbytes
        debug(
            1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' "
            "(pointer 0x{3:x}) with {4} "
            "argument(s) ({5} copy-in/copy-out, {6} scalars)", self._device_id,
            self._stream_id, kernel[0], kernel[1], len(args), len(copy_in_out),
            len(scalars))
        # iterate over the copyin arguments and transfer them
        for c in copy_in_out:
            self.transfer_host2device(c[0], c[1], c[2])
        _pymic_impl_invoke_kernel(self._device_id, self._stream_id, kernel[1],
                                  arg_dims, arg_type, arg_ptrs, arg_size,
                                  len(args))
        # iterate over the copyout arguments, transfer them back,
        # and release buffers
        for c in copy_in_out:
            self.transfer_device2host(c[1], c[0], c[2])
            self.deallocate_device_memory(c[1])
        return None
Example #17
0
    def transfer_device2device(self,
                               device_ptr_src,
                               device_ptr_dst,
                               nbytes,
                               offset_device_src=0,
                               offset_device_dst=0):
        """Transfer data from a device memory location (identified by its 
           fake pointer) to another memory region on the same device. The 
           operation is executed asynchronously with stream semantics.
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           device_ptr_src : int
              Fake pointer to the source memory location
           device_ptr_dst : int 
              Fake pointer to the destination memory location
           nbytes : int 
              Number of bytes to copy
           offset_device_src : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (source).
           offset_device_dst : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (destination).
           
           See Also
           --------
           transfer_host2device, allocate_device_memory, 
           deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr_1 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr_1, nbytes)
           >>> device_ptr_2 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_device2device(device_ptr_1, device_ptr_2, 
                                             nbytes)
           >>> b = numpy.empty_like(a)
           [  6.95303066e-310   6.83874600e-317   3.95252517e-322   
              0.00000000e+000   9.31741387e+242   0.00000000e+000   
              0.00000000e+000   0.00000000e+000   4.94065646e-324   
              3.30519641e-317   1.72409659e+212   1.20070123e-089
              5.05907223e-085   4.87883721e+199   0.00000000e+000   
              6.78545805e-317] 
           # random data
           >>> print b
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr_2, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr_src, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if not isinstance(device_ptr_dst, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if offset_device_src < 0:
            raise ValueError("Negative offset passed for offset_device_src")
        if offset_device_dst < 0:
            raise ValueError("Negative offset passed for offset_device_dst")
        if device_ptr_src is None:
            raise ValueError('Invalid None device pointer')
        if device_ptr_dst is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))

        device_ptr_src = device_ptr_src._device_ptr
        device_ptr_dst = device_ptr_dst._device_ptr
        debug(
            1, '(device {0} -> device {0}) transferring {1} bytes '
            '(source ptr {2}, destination ptr {3})', self._device_id, nbytes,
            device_ptr_src, device_ptr_dst)
        _pymic_impl_stream_memcpy_d2d(self._device_id, self._stream_id,
                                      device_ptr_src, device_ptr_dst, nbytes,
                                      offset_device_src, offset_device_dst)
        return None
Example #18
0
    def transfer_device2host(self,
                             device_ptr,
                             host_ptr,
                             nbytes,
                             offset_device=0,
                             offset_host=0):
        """Transfer data from a device memory location (identified by its 
           fake pointer) to a host memory region identified by its raw pointer 
           (i.e., a C pointer)on the target device. The operation is executed 
           asynchronously with stream semantics.
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           host_ptr : int
              Pointer to the data on the host
           device_ptr : int 
              Fake pointer of the destination
           nbytes : int 
              Number of bytes to copy
           offset_device : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory.
           offset_host : int, optional, default 0
              Transfer offset (bytes) to be added to raw host pointer
           
           See Also
           --------
           transfer_host2device, transfer_device2device,
           allocate_device_memory, deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr, nbytes)
           >>> b = numpy.empty_like(a)
           >>> print b
           [  6.90762927e-310   7.73120247e-317   3.60667921e-322   
              0.00000000e+000   0.00000000e+000   0.00000000e+000   
              0.00000000e+000   0.00000000e+000   4.94065646e-324   
              9.76815212e-317   7.98912845e-317   0.00000000e+000
              5.53353523e-322   1.58101007e-322   0.00000000e+000   
              7.38839996e-317]
           # random data
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if offset_device < 0:
            raise ValueError("Negative offset passed for offset_device")
        if offset_host < 0:
            raise ValueError("Negative offset passed for offset_host")
        if device_ptr is None:
            raise ValueError('Invalid None device pointer')
        if host_ptr is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))
        if device_ptr._offset != 0:
            if offset_device != 0:
                raise ValueError('Offset cannot be non-zero if fake pointer'
                                 'has an offset.')
            offset_device = device_ptr._offset

        debug(
            1, '(device {0} -> host) transferring {1} bytes '
            '(device ptr {2}, host ptr 0x{3:x})', self._device_id, nbytes,
            device_ptr, host_ptr)
        device_ptr = device_ptr._device_ptr
        _pymic_impl_stream_memcpy_d2h(self._device_id, self._stream_id,
                                      device_ptr, host_ptr, nbytes,
                                      offset_device, offset_host)
        return None
Example #19
0
 def __del__(self):
     debug(1, 
           'destroying stream 0x{0:0x} '
           'for device {1}'.format(self._stream_id,
                                   self._device_id))
     _pymic_impl_stream_destroy(self._device_id, self._stream_id)
Example #20
0
        traceback = inspect.getframeinfo(current_frame)
        stack.append((traceback.function,
                     (traceback.filename, traceback.lineno)))
        current_frame = current_frame.f_back
    return stack


def _trace_func(func):
    # this is the do-nothing-wrapper
    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)
    return wrapper


if config._trace_level == 1:
    debug(5, "tracing is enabled", config._trace_level)
elif config._trace_level is not None:
    debug(5, "tracing is disabled", config._trace_level)

if config._trace_level >= 1:
    _stack_walk_func = _stack_walk_compact
    if config._collect_stacks_str.lower() == "none":
        debug(5, "stack collection is set to '{0}'", 
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_none
    elif config._collect_stacks_str.lower() == "compact":
        debug(5, "stack collection is set to '{0}'", 
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_compact
    elif config._collect_stacks_str.lower() == "full":
        debug(5, "stack collection is set to '{0}'", 
Example #21
0
        stack.append(
            (traceback.function, (traceback.filename, traceback.lineno)))
        current_frame = current_frame.f_back
    return stack


def _trace_func(func):
    # this is the do-nothing-wrapper
    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)

    return wrapper


if config._trace_level == 1:
    debug(5, "tracing is enabled", config._trace_level)
elif config._trace_level is not None:
    debug(5, "tracing is disabled", config._trace_level)

if config._trace_level >= 1:
    _stack_walk_func = _stack_walk_compact
    if config._collect_stacks_str.lower() == "none":
        debug(5, "stack collection is set to '{0}'",
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_none
    elif config._collect_stacks_str.lower() == "compact":
        debug(5, "stack collection is set to '{0}'",
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_compact
    elif config._collect_stacks_str.lower() == "full":
        debug(5, "stack collection is set to '{0}'",
Example #22
0
    def transfer_device2device(self, device_ptr_src, device_ptr_dst, 
                               nbytes, offset_device_src=0, 
                               offset_device_dst=0):
        """Transfer data from a device memory location (identified by its 
           fake pointer) to another memory region on the same device. The 
           operation is executed asynchronously with stream semantics.
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           device_ptr_src : int
              Fake pointer to the source memory location
           device_ptr_dst : int 
              Fake pointer to the destination memory location
           nbytes : int 
              Number of bytes to copy
           offset_device_src : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (source).
           offset_device_dst : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (destination).
           
           See Also
           --------
           transfer_host2device, allocate_device_memory, 
           deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr_1 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr_1, nbytes)
           >>> device_ptr_2 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_device2device(device_ptr_1, device_ptr_2, 
                                             nbytes)
           >>> b = numpy.empty_like(a)
           [  6.95303066e-310   6.83874600e-317   3.95252517e-322   
              0.00000000e+000   9.31741387e+242   0.00000000e+000   
              0.00000000e+000   0.00000000e+000   4.94065646e-324   
              3.30519641e-317   1.72409659e+212   1.20070123e-089
              5.05907223e-085   4.87883721e+199   0.00000000e+000   
              6.78545805e-317] 
           # random data
           >>> print b
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr_2, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr_src, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device 
        #       and stream)
        if not isinstance(device_ptr_dst, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device 
        #       and stream)
        if offset_device_src < 0:
            raise ValueError("Negative offset passed for offset_device_src")
        if offset_device_dst < 0:
            raise ValueError("Negative offset passed for offset_device_dst")
        if device_ptr_src is None:
            raise ValueError('Invalid None device pointer')
        if device_ptr_dst is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))
        
        device_ptr_src = device_ptr_src._device_ptr
        device_ptr_dst = device_ptr_dst._device_ptr
        debug(1, '(device {0} -> device {0}) transferring {1} bytes '
                 '(source ptr {2}, destination ptr {3})', 
                 self._device_id, nbytes, device_ptr_src, device_ptr_dst)
        _pymic_impl_stream_memcpy_d2d(self._device_id, self._stream_id, 
                                      device_ptr_src, device_ptr_dst, 
                                      nbytes, offset_device_src, 
                                      offset_device_dst)
        return None
Example #23
0
    def transfer_device2host(self, device_ptr, host_ptr, 
                             nbytes, offset_device=0, offset_host=0):
        """Transfer data from a device memory location (identified by its 
           fake pointer) to a host memory region identified by its raw pointer 
           (i.e., a C pointer)on the target device. The operation is executed 
           asynchronously with stream semantics.
           
           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the 
                    high-level interfaces of OffloadArray instead.
           
           Parameters
           ----------
           host_ptr : int
              Pointer to the data on the host
           device_ptr : int 
              Fake pointer of the destination
           nbytes : int 
              Number of bytes to copy
           offset_device : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory.
           offset_host : int, optional, default 0
              Transfer offset (bytes) to be added to raw host pointer
           
           See Also
           --------
           transfer_host2device, transfer_device2device,
           allocate_device_memory, deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr, nbytes)
           >>> b = numpy.empty_like(a)
           >>> print b
           [  6.90762927e-310   7.73120247e-317   3.60667921e-322   
              0.00000000e+000   0.00000000e+000   0.00000000e+000   
              0.00000000e+000   0.00000000e+000   4.94065646e-324   
              9.76815212e-317   7.98912845e-317   0.00000000e+000
              5.53353523e-322   1.58101007e-322   0.00000000e+000   
              7.38839996e-317]
           # random data
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr, SmartPtr):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device 
        #       and stream)
        if offset_device < 0:
            raise ValueError("Negative offset passed for offset_device")
        if offset_host < 0:
            raise ValueError("Negative offset passed for offset_host")
        if device_ptr is None:
            raise ValueError('Invalid None device pointer')
        if host_ptr is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))
        if device_ptr._offset != 0:
            if offset_device != 0:
                raise ValueError('Offset cannot be non-zero if fake pointer'
                                 'has an offset.')
            offset_device = device_ptr._offset
            
        debug(1, '(device {0} -> host) transferring {1} bytes '
                 '(device ptr {2}, host ptr 0x{3:x})', 
                 self._device_id, nbytes, device_ptr, host_ptr)
        device_ptr = device_ptr._device_ptr
        _pymic_impl_stream_memcpy_d2h(self._device_id, self._stream_id, 
                                      device_ptr, host_ptr, 
                                      nbytes, offset_device, offset_host)
        return None