예제 #1
0
파일: pool.py 프로젝트: simonzack/joblib
    def __call__(self, a):
        m = _get_backing_memmap(a)
        if m is not None:
            # a is already backed by a memmap file, let's reuse it directly
            return _reduce_memmap_backed(a, m)

        if (not a.dtype.hasobject
                and self._max_nbytes is not None
                and a.nbytes > self._max_nbytes):
            # check that the folder exists (lazily create the pool temp folder
            # if required)
            try:
                os.makedirs(self._temp_folder)
                os.chmod(self._temp_folder, FOLDER_PERMISSIONS)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise e

            # Find a unique, concurrent safe filename for writing the
            # content of this array only once.
            if self._context_id is not None:
                marker = self._context_id
            else:
                marker = hash(a)
            basename = "%d-%d-%d-%s.pkl" % (
                os.getpid(), id(threading.current_thread()), id(a), marker)
            filename = os.path.join(self._temp_folder, basename)

            # In case the same array with the same content is passed several
            # times to the pool subprocess children, serialize it only once

            # XXX: implement an explicit reference counting scheme to make it
            # possible to delete temporary files as soon as the workers are
            # done processing this data.
            if not os.path.exists(filename):
                if self.verbose > 0:
                    print("Memmaping (shape=%r, dtype=%s) to new file %s" % (
                        a.shape, a.dtype, filename))
                for dumped_filename in dump(a, filename):
                    os.chmod(dumped_filename, FILE_PERMISSIONS)

                if self._prewarm:
                    # Warm up the data to avoid concurrent disk access in
                    # multiple children processes
                    load(filename, mmap_mode=self._mmap_mode).max()
            elif self.verbose > 1:
                print("Memmaping (shape=%s, dtype=%s) to old file %s" % (
                    a.shape, a.dtype, filename))

            # Let's use the memmap reducer
            return reduce_memmap(load(filename, mmap_mode=self._mmap_mode))
        else:
            # do not convert a into memmap, let pickler do its usual copy with
            # the default system pickler
            if self.verbose > 1:
                print("Pickling array (shape=%r, dtype=%s)." % (
                    a.shape, a.dtype))
            return (loads, (dumps(a, protocol=HIGHEST_PROTOCOL),))
예제 #2
0
파일: pool.py 프로젝트: simonzack/joblib
def reduce_memmap(a):
    """Pickle the descriptors of a memmap instance to reopen on same file"""
    m = _get_backing_memmap(a)
    if m is not None:
        # m is a real mmap backed memmap instance, reduce a preserving striding
        # information
        return _reduce_memmap_backed(a, m)
    else:
        # This memmap instance is actually backed by a regular in-memory
        # buffer: this can happen when using binary operators on numpy.memmap
        # instances
        return (loads, (dumps(np.asarray(a), protocol=HIGHEST_PROTOCOL),))