Example #1
0
 def _set_readahead_hints(self, roi, open_files):
     if not hasattr(os, 'posix_fadvise'):
         return
     if any([f.handle.fileno() is None for f in open_files]):
         return
     for f in open_files:
         os.posix_fadvise(f.handle.fileno(), 0, 0, os.POSIX_FADV_WILLNEED)
Example #2
0
 def __init__(self, reppath, context, ohash_log, refcount_log):
     self.context = context
     # Create a new opportunistic hash collection.
     self.col = opportunistic_hash.OpportunisticHashCollection(
         carvpathcontext=context, ohash_log=ohash_log)
     # We start off with zero open files
     self.openfiles = {}
     # Open the underlying data file and create if needed.
     self.fd = os.open(
         reppath, (os.O_RDWR | os.O_LARGEFILE | os.O_NOATIME | os.O_CREAT))
     # Get the current repository total size.
     cursize = os.lseek(self.fd, 0, os.SEEK_END)
     # Set the entire repository as dontneed and assume everything to be
     # cold data for now.
     posix_fadvise(self.fd, 0, cursize, POSIX_FADV_DONTNEED)
     # Create CarvPath top entity of the proper size.
     self.top = self.context.make_top(size=cursize)
     # Create fadvise functor from fd.
     fadvise = _FadviseFunctor(fd=self.fd)
     # Create a referencecounting carvpath stack using our fadvise functor
     # and ohash collection.
     self.stack = refcount_stack.CarvpathRefcountStack(
         carvpathcontext=self.context,
         fadvise=fadvise,
         ohashcollection=self.col,
         refcount_log=refcount_log)
Example #3
0
def _read(fd, fn, sequential, direct):
    try:
        if direct:
            if sequential is not None:
                fadv_sequential = os.POSIX_FADV_SEQUENTIAL
                fadv_random = os.POSIX_FADV_RANDOM
                advice = fadv_sequential if sequential else fadv_random
                os.posix_fadvise(fd, 0, 0, advice)

            def read(buf):
                data = fn(fd, buf)
                os.posix_fadvise(fd, read.offset, buf, os.POSIX_FADV_DONTNEED)
                read.offset += buf
                return data

            # NOTE: `nonlocal` statement is not available in Python 2.
            read.offset = 0

        else:
            raise AttributeError

    except AttributeError:
        def read(buf):
            return fn(fd, buf)

    return read, fd
Example #4
0
 def _prefetch_for_tile(self, fileset, tile_ranges):
     prefr = _get_prefetch_ranges(len(fileset), tile_ranges)
     prefr = prefr[~np.all(prefr == 0, axis=1)]
     for mi, ma, fidx in prefr:
         f = fileset[fidx]
         os.posix_fadvise(f.handle.fileno(), mi, ma - mi,
                          os.POSIX_FADV_WILLNEED)
Example #5
0
 def __init__(self, reppath, context, ohash_log, refcount_log):
     self.context = context
     # Create a new opportunistic hash collection.
     self.col = opportunistic_hash.OpportunisticHashCollection(
                      carvpathcontext=context,
                      ohash_log=ohash_log)
     # We start off with zero open files
     self.openfiles = {}
     # Open the underlying data file and create if needed.
     self.fd = os.open(reppath,
                       (os.O_RDWR |
                        os.O_LARGEFILE |
                        os.O_NOATIME |
                        os.O_CREAT))
     # Get the current repository total size.
     cursize = os.lseek(self.fd, 0, os.SEEK_END)
     # Set the entire repository as dontneed and assume everything to be
     # cold data for now.
     posix_fadvise(self.fd, 0, cursize, POSIX_FADV_DONTNEED)
     # Create CarvPath top entity of the proper size.
     self.top = self.context.make_top(size=cursize)
     # Create fadvise functor from fd.
     fadvise = _FadviseFunctor(fd=self.fd)
     # Create a referencecounting carvpath stack using our fadvise functor
     # and ohash collection.
     self.stack = refcount_stack.CarvpathRefcountStack(
           carvpathcontext=self.context,
           fadvise=fadvise,
           ohashcollection=self.col,
           refcount_log=refcount_log)
Example #6
0
def raw_baseband_frames(file_name: str, buf: bytes):
    """Iterates over frames in a raw baseband file"""
    with io.FileIO(file_name, "rb") as raw_file:
        while raw_file.readinto(buf):
            yield buf
        size = os.path.getsize(file_name)
        os.posix_fadvise(raw_file.fileno(), 0, size, os.POSIX_FADV_DONTNEED)
Example #7
0
    async def get(self):
        print("PID {} :getting file".format(os.getpid()))
        abs_path = os.path.abspath(self.get_argument('path'))

        if not os.access(abs_path, os.R_OK):
            raise web.HTTPError(status_code=404,
                                reason="File Not Found or File Access Denied")

        file_size = os.path.getsize(abs_path)
        content_type, _ = mimetypes.guess_type(abs_path)

        if not content_type:
            self.set_header('Content-Type', "application/octet-stream")
        else:
            self.set_header('Content-Type', content_type)

        self.add_header(
            'Content-Disposition',
            "attachment; filename={}".format(os.path.basename(abs_path)))
        self.add_header('Content-Length', file_size)

        chunk_size = 1024 * 1024 * 2

        async with aiofiles.open(
                abs_path,
                "rb",
                buffering=0,
                loop=ioloop.IOLoop.current(),
                executor=self.application.thread_executor) as fp:

            # No buffering in address space and inform kernel to buffer aggressively for given file
            if os.name == "posix":

                os.posix_fadvise(fp.fileno(), 0, file_size,
                                 os.POSIX_FADV_WILLNEED)

            while True:

                chunk = await fp.read(chunk_size)

                if not chunk:
                    break

                try:
                    self.write(chunk)
                    await self.flush()

                except iostream.StreamClosedError:
                    break

                finally:
                    del chunk

                    # Used for metering/limiting request bandwidth or forced context switching for fast networks
                    await asyncio.sleep(0.000000001)

        print("PID {} : sent file {}".format(os.getpid(),
                                             os.path.basename(abs_path)))
Example #8
0
 def sync(self):
     """
     Synchronize file contents. Everything written prior to sync() must become durable before anything written
     after sync().
     """
     self.fd.flush()
     fdatasync(self.fileno)
     if hasattr(os, 'posix_fadvise'):
         os.posix_fadvise(self.fileno, 0, 0, os.POSIX_FADV_DONTNEED)
Example #9
0
 def sync(self):
     """
     Synchronize file contents. Everything written prior to sync() must become durable before anything written
     after sync().
     """
     self.fd.flush()
     fdatasync(self.fileno)
     if hasattr(os, 'posix_fadvise'):
         os.posix_fadvise(self.fileno, 0, 0, os.POSIX_FADV_DONTNEED)
Example #10
0
    def write(self, block, data):
        if not self._writer:
            self._writer = open(self.io_name, 'rb+')

        offset = block.id * self._block_size
        self._writer.seek(offset)
        written = self._writer.write(data)
        os.posix_fadvise(self._writer.fileno(), offset, len(data), os.POSIX_FADV_DONTNEED)
        assert written == len(data)
Example #11
0
def try_advise(file, offset, length):
    """Try to advise the OS on what file data is needed next"""
    try:
        if hasattr(file, "fileno"):
            posix_fadvise(file.fileno(),
                          offset,
                          length,
                          POSIX_FADV_WILLNEED)
    except Exception as ex:
        print(ex, file=sys.stderr, flush=True)
Example #12
0
    def nbd_client(self, version_uid):
        self.subprocess_run(args=[
            'sudo', 'nbd-client', '127.0.0.1', '-p',
            str(self.SERVER_PORT), '-l'
        ],
                            success_regexp='^Negotiation: ..\n{}\n$'.format(
                                version_uid[0].v_string))

        version_uid, size = version_uid
        self.subprocess_run(
            args=[
                'sudo', 'nbd-client', '-N', version_uid.v_string, '127.0.0.1',
                '-p',
                str(self.SERVER_PORT), self.NBD_DEVICE
            ],
            success_regexp=
            '^Negotiation: ..size = \d+MB\nbs=1024, sz=\d+ bytes\n$|^Negotiation: ..size = \d+MB|Connected /dev/nbd\d+$'
        )

        count = 0
        nbd_data = bytearray()
        with open(self.NBD_DEVICE, 'rb') as f:
            while True:
                data = f.read(64 * 1024 + random.randint(0, 8192))
                if not data:
                    break
                count += len(data)
                nbd_data += data
        self.assertEqual(size, count)

        image_data = self.read_file(self.testpath.path + '/image')
        logger.info('image_data size {}, nbd_data size {}'.format(
            len(image_data), len(nbd_data)))
        self.assertEqual(image_data, bytes(nbd_data))

        f = os.open(self.NBD_DEVICE, os.O_RDWR)
        for offset in range(0, size, 4096):
            os.lseek(f, offset, os.SEEK_SET)
            data = self.random_bytes(4096)
            written = os.write(f, data)
            os.fsync(f)
            self.assertEqual(len(data), written)
            # Discard cache so that the read request below really goes to the NBD server
            os.posix_fadvise(f, offset, len(data), os.POSIX_FADV_DONTNEED)

            os.lseek(f, offset, os.SEEK_SET)
            read_data = os.read(f, 4096)
            self.assertEqual(data, read_data)
        os.close(f)

        self.subprocess_run(args=['sudo', 'nbd-client', '-d', self.NBD_DEVICE],
                            success_regexp='^disconnect, sock, done\n$')

        # Signal NBD server to stop
        self.nbd_server.stop()
 def _open(self):
     wrapper = super()._open()
     try:
         fd = wrapper.fileno()
         os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
     except Exception:
         # in case either file descriptor cannot be retrieved or fadvise is not available
         # we should simply return the wrapper retrieved by FileHandler's open method
         # the advise to the kernel is just an advise and if we cannot give it, we won't
         pass
     return wrapper
Example #14
0
 def close_segment(self):
     if self._write_fd:
         self.segment += 1
         self.offset = 0
         self._write_fd.flush()
         os.fsync(self._write_fd.fileno())
         if hasattr(os, 'posix_fadvise'):  # only on UNIX
             # tell the OS that it does not need to cache what we just wrote,
             # avoids spoiling the cache for the OS and other processes.
             os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
         self._write_fd.close()
         self._write_fd = None
Example #15
0
 def _set_readahead_hints(self, roi, fileset):
     if not hasattr(os, 'posix_fadvise'):
         return
     if roi is None:
         for f in fileset:
             os.posix_fadvise(
                 f.fileno(), 0, 0,
                 os.POSIX_FADV_SEQUENTIAL | os.POSIX_FADV_WILLNEED)
     else:
         for f in fileset:
             os.posix_fadvise(f.fileno(), 0, 0,
                              os.POSIX_FADV_RANDOM | os.POSIX_FADV_WILLNEED)
Example #16
0
 def close_segment(self):
     if self._write_fd:
         self.segment += 1
         self.offset = 0
         self._write_fd.flush()
         os.fsync(self._write_fd.fileno())
         if hasattr(os, 'posix_fadvise'):  # python >= 3.3, only on UNIX
             # tell the OS that it does not need to cache what we just wrote,
             # avoids spoiling the cache for the OS and other processes.
             os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
         self._write_fd.close()
         self._write_fd = None
Example #17
0
def update(path, value, timestamp=None):
    """
  update(path, value, timestamp=None)

  path is a string
  value is a float
  timestamp is either an int or float
  """
    value = float(value)
    with open(path, 'r+b', BUFFERING) as fh:
        if CAN_FADVISE and FADVISE_RANDOM:
            posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)
        return file_update(fh, value, timestamp)
Example #18
0
def update(path, value, timestamp=None):
  """
  update(path, value, timestamp=None)

  path is a string
  value is a float
  timestamp is either an int or float
  """
  value = float(value)
  with open(path, 'r+b', BUFFERING) as fh:
    if CAN_FADVISE and FADVISE_RANDOM:
      posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)
    return file_update(fh, value, timestamp)
Example #19
0
 def _set_readahead_hints(self, roi, fileset):
     if not hasattr(os, 'posix_fadvise'):
         return
     if any([f.fileno() is None
             for f in fileset]):
         return
     for f in fileset:
         os.posix_fadvise(
             f.fileno(),
             0,
             0,
             os.POSIX_FADV_WILLNEED
         )
Example #20
0
 def __getstate__(self):
     prefix = id(self)
     for idx, (filename, (offset,
                          size)) in enumerate(self.file_chunks.items()):
         try:
             fd = os.open(filename, 'rb')
             posix_fadvise(fd, offset, size, POSIX_FADV_SEQUENTIAL)
             os.close(fd)
         except Exception:
             pass
         _, attacher = file_attachment(filename, offset, size, False)
         key = struct.pack('NN', prefix, idx)
         attach(key, attacher)
     return {'prefix': prefix, 'N': idx + 1}
Example #21
0
def safe_fadvise(fd, offset, len, advice):
    if hasattr(os, 'posix_fadvise'):
        advice = getattr(os, 'POSIX_FADV_' + advice)
        try:
            os.posix_fadvise(fd, offset, len, advice)
        except OSError:
            # usually, posix_fadvise can't fail for us, but there seem to
            # be failures when running borg under docker on ARM, likely due
            # to a bug outside of borg.
            # also, there is a python wrapper bug, always giving errno = 0.
            # https://github.com/borgbackup/borg/issues/2095
            # as this call is not critical for correct function (just to
            # optimize cache usage), we ignore these errors.
            pass
Example #22
0
def update_many(path, points):
  """update_many(path,points)

path is a string
points is a list of (timestamp,value) points
"""
  if not points:
    return
  points = [(int(t), float(v)) for (t, v) in points]
  points.sort(key=lambda p: p[0], reverse=True)  # Order points by timestamp, newest first
  with open(path, 'r+b', BUFFERING) as fh:
    if CAN_FADVISE and FADVISE_RANDOM:
      posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)
    return file_update_many(fh, points)
Example #23
0
def safe_fadvise(fd, offset, len, advice):
    if hasattr(os, 'posix_fadvise'):
        advice = getattr(os, 'POSIX_FADV_' + advice)
        try:
            os.posix_fadvise(fd, offset, len, advice)
        except OSError:
            # usually, posix_fadvise can't fail for us, but there seem to
            # be failures when running borg under docker on ARM, likely due
            # to a bug outside of borg.
            # also, there is a python wrapper bug, always giving errno = 0.
            # https://github.com/borgbackup/borg/issues/2095
            # as this call is not critical for correct function (just to
            # optimize cache usage), we ignore these errors.
            pass
Example #24
0
 def ncc(self):
     '''Low-level. Calls ncc binary for appropriate platform, returns raw output as string.
     Behaves like "no-cache-cat" (ncc) but in pure-python. Only works on Unix, possibly Linux.
     If this does not work correctly it is a silent failure; self-testing is essential
     to ensure that non-caching reads are executed successfully. If not, fallback to
     custom compiled C binaries would be necessary to get readouts.'''
     if not self.ready:
         raise OpenPCRError("Device not ready, cannot read status.")
     filen = os.path.join(self.devicepath, 'STATUS.TXT')
     with open(filen, "rb") as InF:
         os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
         fc = InF.read()
     # Return until first null character.
     # Odd null/whitespace pattern is incompatible with unicode mode.
     return fc.split(b"\0", 1)[0].decode()
Example #25
0
 def ncc(self):
     """Low-level. Calls ncc binary for appropriate platform, returns raw output as string.
     Behaves like "no-cache-cat" (ncc) but in pure-python. Only works on Unix, possibly Linux.
     If this does not work correctly it is a silent failure; self-testing is essential
     to ensure that non-caching reads are executed successfully. If not, fallback to
     custom compiled C binaries would be necessary to get readouts."""
     if not self.ready:
         raise OpenPCRError("Device not ready, cannot read status.")
     filen = os.path.join(self.devicepath, "STATUS.TXT")
     with open(filen, "rb") as InF:
         os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
         fc = InF.read()
     # Return until first null character.
     # Odd null/whitespace pattern is incompatible with unicode mode.
     return fc.split(b"\0", 1)[0].decode()
Example #26
0
def update_many(path, points):
    """update_many(path,points)

path is a string
points is a list of (timestamp,value) points
"""
    if not points:
        return
    points = [(int(t), float(v)) for (t, v) in points]
    points.sort(key=lambda p: p[0],
                reverse=True)  # Order points by timestamp, newest first
    with open(path, 'r+b', BUFFERING) as fh:
        if CAN_FADVISE and FADVISE_RANDOM:
            posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)
        return file_update_many(fh, points)
Example #27
0
def hash_file(fn: Callable[[Union[bytes, memoryview]], None], fd: int,
              size: int, offset: int) -> int:
    """Repeatedly call a function on a slice of a file."""
    buffsize = _BUFFSIZE
    done = 0

    os.posix_fadvise(fd, offset, size, os.POSIX_FADV_SEQUENTIAL)

    if hasattr(os, "preadv"):  # pragma: py-lt-37
        preadv = cast(
            Callable[[int, List[bytearray], int], int],
            getattr(os, "preadv"),  # noqa: B009
        )

        buff = bytearray(buffsize)
        bufflist = [buff]
        view = memoryview(buff)

        while size > 0:
            n = preadv(fd, bufflist, offset)

            n = min(n, size)

            if n < buffsize:
                fn(view[:n])
            else:
                fn(view)

            done += n
            size -= n
            offset += n
    else:  # Python <= 3.6
        while size > 0:
            data = os.pread(fd, buffsize, offset)
            datasize = len(data)

            n = min(datasize, size)

            if n < datasize:
                fn(data[:n])
            else:
                fn(data)

            done += n
            size -= n
            offset += n

    return done
Example #28
0
    def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock:
        offset = block.id * self._block_size
        t1 = time.time()
        with open(self._path, 'rb+') as f:
            f.seek(offset)
            written = f.write(data)
            os.posix_fadvise(f.fileno(), offset, len(data), os.POSIX_FADV_DONTNEED)
        t2 = time.time()

        logger.debug('{} wrote block {} in {:.2f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        assert written == len(data)
        return block
Example #29
0
File: hdf5.py Project: ifm/nexxT
    def onPortDataChanged(self, port):
        """
        Called when new data arrives at a port.

        :param port: the port where the new data is available.
        :return:
        """
        if self._currentFile is None:
            # recording not active -> do nothing
            return
        s = self._currentFile["streams"][port.name()]
        sample = port.getData()

        # perform timestamp calculations
        if s.shape[0] > 0:
            lastDataTimestamp = self._lastDataTimestamp
            lastRcvTimestamp = self._lastRcvTimestamp
        else:
            lastDataTimestamp = sample.getTimestamp()
            lastRcvTimestamp = 0
        if self._useRcvTimestamps:
            rcvTimestamp = np.int64(time.perf_counter_ns() -
                                    self._basetime) / 1000
        else:
            rcvTimestamp = max(1, sample.getTimestamp() - lastDataTimestamp)

        self._lastDataTimestamp = np.int64(sample.getTimestamp())
        self._lastRcvTimestamp = rcvTimestamp
        # append the new data to the existing HDF5 dataset
        s.resize((s.shape[0] + 1, ))
        s[-1:] = (np.frombuffer(sample.getContent(),
                                dtype=np.uint8), sample.getDatatype(),
                  np.int64(sample.getTimestamp()), rcvTimestamp)
        self._currentFile.flush()

        # status update once each second
        if (rcvTimestamp // 1000000) != (lastRcvTimestamp // 1000000):
            if hasattr(os, "posix_fadvise") and self.propertyCollection(
            ).getProperty("use_posix_fadvise_if_available"):
                os.posix_fadvise(self._currentFile.id.get_vfd_handle(), 0,
                                 self._currentFile.id.get_filesize(),
                                 os.POSIX_FADV_DONTNEED)
            self.statusUpdate.emit(self._name, rcvTimestamp * 1e-6,
                                   self._currentFile.id.get_filesize())
Example #30
0
 def _sha1file(self, node):
     """Checksum a single node (file)
     """
     if DEBUG:
         print("_sha1file({})".format(node))
     try:
         starttime = time.time()
         with open(node['Path'], 'rb') as f:
             # Caching:
             # This risks filling caches with what we're reading here, displacing potentially higher value items.
             #
             # To work this out trials of FADV options where mode. The lowest option was:
             #   * on opening the file POSIX_FADV_NOREUSE
             #   * before closing the file os.POSIX_FADV_DONTNEED
             #
             # Differences seem small, but this none the less is the lowest cache option consistently in 3 tests
             #
             # See:
             #   man 2 posix_fadvise
             #   https://stackoverflow.com/questions/15266115/read-file-without-disk-caching-in-linux
             os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_NOREUSE)
             sha = hashlib.sha1()
             data = ' '  # we start with something as the "last read" to ensure the loop starts
             blockcount = 0
             while data:
                 data = f.read(self.block_size)
                 sha.update(data)
                 blockcount += 1
                 # check on progress
                 if blockcount >= self.block_burst:
                     now = time.time()
                     delay = self.burst_time - (now - starttime)
                     if delay > 0.0:
                         time.sleep(delay)
                         starttime += self.burst_time
                     else:
                         # we're slipping - keep slipping
                         starttime = now
                     blockcount = 0
             os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
             f.close()
             return sha.hexdigest()
     except FileNotFoundError:
         return None
Example #31
0
    def _read(self, block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]:
        offset = block.id * self._block_size
        t1 = time.time()
        with open(self._path, 'rb') as f:
            f.seek(offset)
            data = f.read(block.size)
            os.posix_fadvise(f.fileno(), offset, block.size, os.POSIX_FADV_DONTNEED)
        t2 = time.time()

        if not data:
            raise EOFError('End of file reached on {} when there should be data.'.format(self.url))

        logger.debug('{} read block {} in {:.2f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        return block, data
Example #32
0
def hexdigest_file(path: PathLike, algorithm: str) -> str:
    """Return the hexdigest of the file at `path` using `algorithm`

    Will stream the contents of file to the hash `algorithm` and
    return the hexdigest. If the specified `algorithm` is not
    supported a `ValueError` will be raised.
    """
    hasher = hashlib.new(algorithm)

    with open(path, "rb") as f:

        os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL)

        while True:
            data = f.read(BLOCKSIZE)
            if not data:
                break

            hasher.update(data)

    return hasher.hexdigest()
Example #33
0
 def __init__(self, pathname, writeable = False):
     '''
     Constructor.
     
     @param  pathname:str    The pathname of the file to use.
     @param  writeable:bool  Should be file be open for writting too?
     '''
     INTSIZE = 20
     
     self.pathname = pathname
     self.fd = os.open(pathname, os.O_RDWR if writeable else os.O_RDONLY)
     try:
         os.posix_fadvise(self.fd, 0, 0, os.POSIX_FADV_RANDOM)
     except:
         pass
     self.width    = int(self.__read(INTSIZE, INTSIZE * 0).decode('utf-8', 'strict'))
     self.items    = int(self.__read(INTSIZE, INTSIZE * 1).decode('utf-8', 'strict'))
     self.size     = int(self.__read(INTSIZE, INTSIZE * 2).decode('utf-8', 'strict'))
     self.removed  = int(self.__read(INTSIZE, INTSIZE * 3).decode('utf-8', 'strict'))
     self.offset = INTSIZE * 4
     self.xwidth = self.width + INTSIZE * 2
Example #34
0
    def __init__(self, file, prune, num_docs, vocab_size, in_memory, gpu):
        self.file = file = open(file, 'rb')
        mmp = mmap.mmap(file.fileno(),
                        0,
                        flags=mmap.MAP_PRIVATE,
                        prot=mmap.PROT_READ)
        if in_memory:
            # file will be read in in full sequentially
            os.posix_fadvise(file.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL)
        else:
            # file will be read randomly as needed
            os.posix_fadvise(file.fileno(), 0, 0, os.POSIX_FADV_RANDOM)
        if prune != 0:
            S_DTYPE = 2
            if in_memory:
                mmp = np.empty((num_docs, prune), dtype='i2'), np.empty(
                    (num_docs, prune), dtype='f2')
                for did in logger.pbar(range(num_docs), desc='loading dvecs'):
                    try:
                        mmp[0][did] = np.frombuffer(file.read(prune * S_DTYPE),
                                                    dtype='i2')
                        mmp[1][did] = np.frombuffer(file.read(prune * S_DTYPE),
                                                    dtype='f2')
                    except ValueError:
                        pass
                file.close()
            self.lookup = self.dvec_lookup_pruned
        else:
            if in_memory:
                mmp = file.read()
                file.close()
            self.lookup = self.dvec_lookup_unpruned

        self.prune = prune
        self.num_docs = num_docs
        self.vocab_size = vocab_size
        self.mmp = mmp
        self.gpu = gpu
        self.in_memory = in_memory
Example #35
0
    def __getitem__(self, index):
        # if not isinstance(index, int): index = np.sort(index)
        if self.reopen_mem_map:
            self.event_data = np.memmap(self.path,
                                        mode="r",
                                        shape=self.shape,
                                        offset=self.offset,
                                        dtype=self.dtype)

        self.e = np.array(self.event_data[index, :, :, :19])
        # self.event_data.read_direct(self.e,source_sel=np.s_[index,:,:,:19],dest_sel=np.s_[:])

        if self.fadvise == 'file':
            os.posix_fadvise(self.fd.fileno(), 0, self.f.id.get_filesize(),
                             os.POSIX_FADV_DONTNEED)
        elif self.fadvise == 'dataset':
            os.posix_fadvise(self.fd.fileno(),
                             self.hdf5_event_data.id.get_offset(),
                             self.hdf5_event_data.id.get_storage_size(),
                             os.POSIX_FADV_DONTNEED)

        return self.e
Example #36
0
    def _read(self, block):
        with open(self.io_name, 'rb') as source_file:
            offset = block.id * self._block_size
            t1 = time.time()
            source_file.seek(offset)
            data = source_file.read(block.size)
            t2 = time.time()
            # throw away cache
            os.posix_fadvise(source_file.fileno(), offset, block.size, os.POSIX_FADV_DONTNEED)

        if not data:
            raise EOFError('EOF reached on source when there should be data.')

        data_checksum = data_hexdigest(self._hash_function, data)

        logger.debug('{} read block {} (checksum {}...) in {:.2f}s'.format(
            threading.current_thread().name,
            block.id,
            data_checksum[:16],
            t2 - t1,
        ))

        return block, data, data_checksum
Example #37
0
 def random(self, offset, size):
     posix_fadvise(self.fd, offset, size, POSIX_FADV_RANDOM)
Example #38
0
 def normal(self, offset,size):
     posix_fadvise(self.fd, offset, size, POSIX_FADV_NORMAL)
Example #39
0
 def __call__(self, offset, size, willneed):
     if willneed:
         posix_fadvise(self.fd, offset, size, POSIX_FADV_WILLNEED)
     else:
         posix_fadvise(self.fd, offset, size, POSIX_FADV_DONTNEED)
Example #40
0
#!/usr/bin/env python3
import os
import mmap
import sys
if sys.version_info[:2] < (3,3):
    # native posix_fadvise introduced in 3.3, can shim in with ctypes:
    import ctypes
    libc = ctypes.CDLL("libc.so.6")
    os.posix_fadvise = libc.posix_fadvise
    os.POSIX_FADV_NORMAL     = 0
    os.POSIX_FADV_RANDOM     = 1
    os.POSIX_FADV_SEQUENTIAL = 2
    os.POSIX_FADV_WILLNEED   = 3
    os.POSIX_FADV_DONTNEED   = 4
    os.POSIX_FADV_NOREUSE    = 5

if __name__ == "__main__":
    import sys
    with open(sys.argv[1],"rb") as InF:
        os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
        print(InF.read().split(b"\0",1)[0].decode())
Example #41
0
File: wiper.py Project: tyll/tools
# }}}

import argparse
import os
import sys
import time

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("target")
    args = parser.parse_args()

    fd = os.open(args.target, os.O_WRONLY)
    # POSIX_FADV_NOREUSE is a no-op
    # os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_NOREUSE)
    os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
    filesize = os.lseek(fd, 0, os.SEEK_END)
    os.lseek(fd, 0, os.SEEK_SET)

    bufsize = 512 * 4 * 1024
    buf = bytearray([0] * bufsize)
    allwritten = 0
    remainingdata = filesize
    filesize_mib = int(filesize / 2**20)

    start = time.time()
    laststatus = start
    buffered = 0
    try:
        while remainingdata != 0:
            remainingdata = filesize - allwritten
Example #42
0
 def fadvise_sequential(descriptor):
     """ Try to advise the kernel to read from 'descriptor' sequentially. """
     try:
         posix_fadvise(descriptor.fileno(), 0, 0, POSIX_FADV_SEQUENTIAL)
     except:
         pass
Example #43
0
def create(path,
           archiveList,
           xFilesFactor=None,
           aggregationMethod=None,
           sparse=False,
           useFallocate=False):
    """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average')

  path               is a string
  archiveList        is a list of archives, each of which is of the form
                     (secondsPerPoint, numberOfPoints)
  xFilesFactor       specifies the fraction of data points in a propagation interval
                     that must have known values for a propagation to occur
  aggregationMethod  specifies the function to use when propagating data (see
                     ``whisper.aggregationMethods``)
  """
    # Set default params
    if xFilesFactor is None:
        xFilesFactor = 0.5
    if aggregationMethod is None:
        aggregationMethod = 'average'

    # Validate archive configurations...
    validateArchiveList(archiveList)

    # Looks good, now we create the file and write the header
    if os.path.exists(path):
        raise InvalidConfiguration("File %s already exists!" % path)

    with open(path, 'wb', BUFFERING) as fh:
        try:
            if LOCK:
                fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
            if CAN_FADVISE and FADVISE_RANDOM:
                posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)

            oldest = max([
                secondsPerPoint * points
                for secondsPerPoint, points in archiveList
            ])

            __writeHeaderMetadata(fh, aggregationMethod, oldest, xFilesFactor,
                                  len(archiveList))

            headerSize = metadataSize + (archiveInfoSize * len(archiveList))
            archiveOffsetPointer = headerSize

            for secondsPerPoint, points in archiveList:
                archiveInfo = struct.pack(archiveInfoFormat,
                                          archiveOffsetPointer,
                                          secondsPerPoint, points)
                fh.write(archiveInfo)
                archiveOffsetPointer += (points * pointSize)

            # If configured to use fallocate and capable of fallocate use that, else
            # attempt sparse if configure or zero pre-allocate if sparse isn't configured.
            if CAN_FALLOCATE and useFallocate:
                remaining = archiveOffsetPointer - headerSize
                fallocate(fh, headerSize, remaining)
            elif sparse:
                fh.seek(archiveOffsetPointer - 1)
                fh.write(b'\x00')
            else:
                remaining = archiveOffsetPointer - headerSize
                chunksize = 16384
                zeroes = b'\x00' * chunksize
                while remaining > chunksize:
                    fh.write(zeroes)
                    remaining -= chunksize
                fh.write(zeroes[:remaining])

            if AUTOFLUSH:
                fh.flush()
                os.fsync(fh.fileno())
            # Explicitly close the file to catch IOError on close()
            fh.close()
        except IOError:
            # if we got an IOError above, the file is either empty or half created.
            # Better off deleting it to avoid surprises later
            os.unlink(fh.name)
            raise
Example #44
0
#!/usr/bin/env python3
import os
import mmap
import sys
if sys.version_info[:2] < (3, 3):
    # native posix_fadvise introduced in 3.3, can shim in with ctypes:
    import ctypes
    libc = ctypes.CDLL("libc.so.6")
    os.posix_fadvise = libc.posix_fadvise
    os.POSIX_FADV_NORMAL = 0
    os.POSIX_FADV_RANDOM = 1
    os.POSIX_FADV_SEQUENTIAL = 2
    os.POSIX_FADV_WILLNEED = 3
    os.POSIX_FADV_DONTNEED = 4
    os.POSIX_FADV_NOREUSE = 5

if __name__ == "__main__":
    import sys
    with open(sys.argv[1], "rb") as InF:
        os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
        print(InF.read().split(b"\0", 1)[0].decode())
Example #45
0
 def close_fd(self, fd):
     if hasattr(os, 'posix_fadvise'):  # only on UNIX
         os.posix_fadvise(fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
     fd.close()
Example #46
0
 def sequential(self, offset, size):
     posix_fadvise(self.fd, offset, size, POSIX_FADV_SEQUENTIAL)
Example #47
0
 def noreuse(self, offset, size):
     posix_fadvise(self.fd, offset, size, POSIX_FADV_NOREUSE)
Example #48
0
import hashlib
from math import ceil
import subprocess
import time
from datetime import timedelta

SAME = b"0"
DIFF = b"1"
COMPLEN = len(SAME)  # SAME/DIFF length

LOCAL_FADVISE = 1
REMOTE_FADVISE = 2

if callable(getattr(os, "posix_fadvise", False)):
    from os import posix_fadvise, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED
    fadvise = lambda fileobj, offset, length, advice: posix_fadvise(fileobj.fileno(), offset, length, advice)
else:
    try:
        from fadvise import set_advice, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED
        fadvise = lambda fileobj, offset, length, advice: set_advice(fileobj, advice, offset, length)
    except:
        fadvise = None

if fadvise:
    USE_DONTNEED = sys.platform.startswith('linux')
    USE_NOREUSE = not(USE_DONTNEED)
else:
    USE_NOREUSE = USE_DONTNEED = False

def do_create(f, size):
    f = open(f, 'a', 0)
Example #49
0
def uncache(path):
    fd = os.open(path, os.O_RDWR)
    os.fdatasync(fd)
    os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
    os.close(fd)
Example #50
0
 def os_access_hint(file_obj):
     flags = POSIX_FADV_SEQUENTIAL | POSIX_FADV_WILLNEED
     posix_fadvise(file_obj.fileno(), 0, 0, flags)
Example #51
0
import hashlib
from math import ceil
import subprocess
import time
from datetime import timedelta

SAME = b"0"
DIFF = b"1"
COMPLEN = len(SAME)  # SAME/DIFF length

LOCAL_FADVISE = 1
REMOTE_FADVISE = 2

if callable(getattr(os, "posix_fadvise", False)):
    from os import posix_fadvise, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED
    fadvise = lambda fileobj, offset, length, advice: posix_fadvise(fileobj.fileno(), offset, length, advice)
else:
    try:
        from fadvise import set_advice, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED
        fadvise = lambda fileobj, offset, length, advice: set_advice(fileobj, advice, offset, length)
    except:
        fadvise = None

if fadvise:
    USE_DONTNEED = sys.platform.startswith('linux')
    USE_NOREUSE = not(USE_DONTNEED)
else:
    USE_NOREUSE = USE_DONTNEED = False

def do_create(f, size):
    f = open(f, 'a', 0)
Example #52
0
def create(path, archiveList, xFilesFactor=None, aggregationMethod=None,
           sparse=False, useFallocate=False):
  """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average')

  path               is a string
  archiveList        is a list of archives, each of which is of the form
                     (secondsPerPoint, numberOfPoints)
  xFilesFactor       specifies the fraction of data points in a propagation interval
                     that must have known values for a propagation to occur
  aggregationMethod  specifies the function to use when propagating data (see
                     ``whisper.aggregationMethods``)
  """
  # Set default params
  if xFilesFactor is None:
    xFilesFactor = 0.5
  if aggregationMethod is None:
    aggregationMethod = 'average'

  # Validate archive configurations...
  validateArchiveList(archiveList)

  # Looks good, now we create the file and write the header
  if os.path.exists(path):
    raise InvalidConfiguration("File %s already exists!" % path)

  with open(path, 'wb', BUFFERING) as fh:
    try:
      if LOCK:
        fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
      if CAN_FADVISE and FADVISE_RANDOM:
        posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)

      oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList])

      __writeHeaderMetadata(fh, aggregationMethod, oldest, xFilesFactor,
                            len(archiveList))

      headerSize = metadataSize + (archiveInfoSize * len(archiveList))
      archiveOffsetPointer = headerSize

      for secondsPerPoint, points in archiveList:
        archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points)
        fh.write(archiveInfo)
        archiveOffsetPointer += (points * pointSize)

      # If configured to use fallocate and capable of fallocate use that, else
      # attempt sparse if configure or zero pre-allocate if sparse isn't configured.
      if CAN_FALLOCATE and useFallocate:
        remaining = archiveOffsetPointer - headerSize
        fallocate(fh, headerSize, remaining)
      elif sparse:
        fh.seek(archiveOffsetPointer - 1)
        fh.write(b'\x00')
      else:
        remaining = archiveOffsetPointer - headerSize
        chunksize = 16384
        zeroes = b'\x00' * chunksize
        while remaining > chunksize:
          fh.write(zeroes)
          remaining -= chunksize
        fh.write(zeroes[:remaining])

      if AUTOFLUSH:
        fh.flush()
        os.fsync(fh.fileno())
      # Explicitly close the file to catch IOError on close()
      fh.close()
    except IOError:
      # if we got an IOError above, the file is either empty or half created.
      # Better off deleting it to avoid surprises later
      os.unlink(fh.name)
      raise