Beispiel #1
0
Datei: midx.py Projekt: vipup/bup
    def __init__(self, filename):
        self.name = filename
        self.force_keep = False
        self.map = None
        assert (filename.endswith('.midx'))
        self.map = mmap_read(open(filename))
        if str(self.map[0:4]) != 'MIDX':
            log('Warning: skipping: invalid MIDX header in %r\n' % filename)
            self.force_keep = True
            return self._init_failed()
        ver = struct.unpack('!I', self.map[4:8])[0]
        if ver < MIDX_VERSION:
            log('Warning: ignoring old-style (v%d) midx %r\n' %
                (ver, filename))
            self.force_keep = False  # old stuff is boring
            return self._init_failed()
        if ver > MIDX_VERSION:
            log('Warning: ignoring too-new (v%d) midx %r\n' % (ver, filename))
            self.force_keep = True  # new stuff is exciting
            return self._init_failed()

        self.bits = _helpers.firstword(self.map[8:12])
        self.entries = 2**self.bits
        self.fanout = buffer(self.map, 12, self.entries * 4)
        self.sha_ofs = 12 + self.entries * 4
        self.nsha = nsha = self._fanget(self.entries - 1)
        self.shatable = buffer(self.map, self.sha_ofs, nsha * 20)
        self.which_ofs = self.sha_ofs + 20 * nsha
        self.whichlist = buffer(self.map, self.which_ofs, nsha * 4)
        self.idxnames = str(self.map[self.which_ofs + 4 * nsha:]).split('\0')
Beispiel #2
0
Datei: midx.py Projekt: bup/bup
    def __init__(self, filename):
        self.name = filename
        self.force_keep = False
        self.map = None
        assert(filename.endswith('.midx'))
        self.map = mmap_read(open(filename))
        if str(self.map[0:4]) != 'MIDX':
            log('Warning: skipping: invalid MIDX header in %r\n' % filename)
            self.force_keep = True
            return self._init_failed()
        ver = struct.unpack('!I', self.map[4:8])[0]
        if ver < MIDX_VERSION:
            log('Warning: ignoring old-style (v%d) midx %r\n' 
                % (ver, filename))
            self.force_keep = False  # old stuff is boring  
            return self._init_failed()
        if ver > MIDX_VERSION:
            log('Warning: ignoring too-new (v%d) midx %r\n'
                % (ver, filename))
            self.force_keep = True  # new stuff is exciting
            return self._init_failed()

        self.bits = _helpers.firstword(self.map[8:12])
        self.entries = 2**self.bits
        self.fanout = buffer(self.map, 12, self.entries*4)
        self.sha_ofs = 12 + self.entries*4
        self.nsha = nsha = self._fanget(self.entries-1)
        self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
        self.which_ofs = self.sha_ofs + 20*nsha
        self.whichlist = buffer(self.map, self.which_ofs, nsha*4)
        self.idxnames = str(self.map[self.which_ofs + 4*nsha:]).split('\0')
Beispiel #3
0
    def __init__(self, filename):
        self.name = filename
        self.force_keep = False
        self.map = None
        assert(filename.endswith(b'.midx'))
        self.map = mmap_read(open(filename))
        if self.map[0:4] != b'MIDX':
            log('Warning: skipping: invalid MIDX header in %r\n'
                % path_msg(filename))
            self.force_keep = True
            return self._init_failed()
        ver = struct.unpack('!I', self.map[4:8])[0]
        if ver < MIDX_VERSION:
            log('Warning: ignoring old-style (v%d) midx %r\n' 
                % (ver, path_msg(filename)))
            self.force_keep = False  # old stuff is boring  
            return self._init_failed()
        if ver > MIDX_VERSION:
            log('Warning: ignoring too-new (v%d) midx %r\n'
                % (ver, path_msg(filename)))
            self.force_keep = True  # new stuff is exciting
            return self._init_failed()

        self.bits = _helpers.firstword(self.map[8:12])
        self.entries = 2**self.bits
        self.fanout_ofs = 12
        # fanout len is self.entries * 4
        self.sha_ofs = self.fanout_ofs + self.entries * 4
        self.nsha = self._fanget(self.entries - 1)
        # sha table len is self.nsha * 20
        self.which_ofs = self.sha_ofs + 20 * self.nsha
        # which len is self.nsha * 4
        self.idxnames = self.map[self.which_ofs + 4 * self.nsha:].split(b'\0')
Beispiel #4
0
    def __init__(self, filename, f=None, readwrite=False, expected=-1):
        self.closed = False
        self.name = filename
        self.readwrite = readwrite
        self.file = None
        self.map = None
        assert (filename.endswith(b'.bloom'))
        if readwrite:
            assert (expected > 0)
            self.file = f = f or open(filename, 'r+b')
            f.seek(0)

            # Decide if we want to mmap() the pages as writable ('immediate'
            # write) or else map them privately for later writing back to
            # the file ('delayed' write).  A bloom table's write access
            # pattern is such that we dirty almost all the pages after adding
            # very few entries.  But the table is so big that dirtying
            # *all* the pages often exceeds Linux's default
            # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
            # thus causing it to start flushing the table before we're
            # finished... even though there's more than enough space to
            # store the bloom table in RAM.
            #
            # To work around that behaviour, if we calculate that we'll
            # probably end up touching the whole table anyway (at least
            # one bit flipped per memory page), let's use a "private" mmap,
            # which defeats Linux's ability to flush it to disk.  Then we'll
            # flush it as one big lump during close().
            pages = os.fstat(f.fileno()).st_size // 4096 * 5  # assume k=5
            self.delaywrite = expected > pages
            debug1('bloom: delaywrite=%r\n' % self.delaywrite)
            if self.delaywrite:
                self.map = mmap_readwrite_private(self.file, close=False)
            else:
                self.map = mmap_readwrite(self.file, close=False)
        else:
            self.file = f or open(filename, 'rb')
            self.map = mmap_read(self.file)
        got = self.map[0:4]
        if got != b'BLOM':
            log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
            self._init_failed()
            return
        ver = struct.unpack('!I', self.map[4:8])[0]
        if ver < BLOOM_VERSION:
            log('Warning: ignoring old-style (v%d) bloom %r\n' %
                (ver, filename))
            self._init_failed()
            return
        if ver > BLOOM_VERSION:
            log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename))
            self._init_failed()
            return

        self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
        idxnamestr = self.map[16 + 2**self.bits:]
        if idxnamestr:
            self.idxnames = idxnamestr.split(b'\0')
        else:
            self.idxnames = []
Beispiel #5
0
Datei: bloom.py Projekt: bup/bup
    def __init__(self, filename, f=None, readwrite=False, expected=-1):
        self.name = filename
        self.rwfile = None
        self.map = None
        assert(filename.endswith('.bloom'))
        if readwrite:
            assert(expected > 0)
            self.rwfile = f = f or open(filename, 'r+b')
            f.seek(0)

            # Decide if we want to mmap() the pages as writable ('immediate'
            # write) or else map them privately for later writing back to
            # the file ('delayed' write).  A bloom table's write access
            # pattern is such that we dirty almost all the pages after adding
            # very few entries.  But the table is so big that dirtying
            # *all* the pages often exceeds Linux's default
            # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
            # thus causing it to start flushing the table before we're
            # finished... even though there's more than enough space to
            # store the bloom table in RAM.
            #
            # To work around that behaviour, if we calculate that we'll
            # probably end up touching the whole table anyway (at least
            # one bit flipped per memory page), let's use a "private" mmap,
            # which defeats Linux's ability to flush it to disk.  Then we'll
            # flush it as one big lump during close().
            pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5
            self.delaywrite = expected > pages
            debug1('bloom: delaywrite=%r\n' % self.delaywrite)
            if self.delaywrite:
                self.map = mmap_readwrite_private(self.rwfile, close=False)
            else:
                self.map = mmap_readwrite(self.rwfile, close=False)
        else:
            self.rwfile = None
            f = f or open(filename, 'rb')
            self.map = mmap_read(f)
        got = str(self.map[0:4])
        if got != 'BLOM':
            log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
            return self._init_failed()
        ver = struct.unpack('!I', self.map[4:8])[0]
        if ver < BLOOM_VERSION:
            log('Warning: ignoring old-style (v%d) bloom %r\n' 
                % (ver, filename))
            return self._init_failed()
        if ver > BLOOM_VERSION:
            log('Warning: ignoring too-new (v%d) bloom %r\n'
                % (ver, filename))
            return self._init_failed()

        self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
        idxnamestr = str(self.map[16 + 2**self.bits:])
        if idxnamestr:
            self.idxnames = idxnamestr.split('\0')
        else:
            self.idxnames = []
Beispiel #6
0
 def __init__(self, filename, f):
     self.name = filename
     self.idxnames = [self.name]
     self.map = mmap_read(f)
     self.fanout = list(struct.unpack('!256I', buffer(self.map, 0, 256 * 4)))
     self.fanout.append(0)  # entry "-1"
     nsha = self.fanout[255]
     self.sha_ofs = 256*4
     self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
Beispiel #7
0
 def __init__(self, filename, f):
     self.name = filename
     self.idxnames = [self.name]
     self.map = mmap_read(f)
     self.fanout = list(struct.unpack('!256I',
                                      str(buffer(self.map, 0, 256*4))))
     self.fanout.append(0)  # entry "-1"
     nsha = self.fanout[255]
     self.sha_ofs = 256*4
     self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
Beispiel #8
0
 def __init__(self, filename, f):
     self.name = filename
     self.idxnames = [self.name]
     self.map = mmap_read(f)
     # Min size for 'L' is 4, which is sufficient for struct's '!I'
     self.fanout = array('L', struct.unpack('!256I', self.map))
     self.fanout.append(0)  # entry "-1"
     self.nsha = self.fanout[255]
     self.sha_ofs = 256 * 4
     # Avoid slicing shatable for individual hashes (very high overhead)
     self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)
Beispiel #9
0
 def __init__(self, filename, f):
     self.name = filename
     self.idxnames = [self.name]
     self.map = mmap_read(f)
     assert self.map[0:8] == b'\377tOc\0\0\0\2'
     # Min size for 'L' is 4, which is sufficient for struct's '!I'
     self.fanout = array('L', struct.unpack_from('!256I', self.map, offset=8))
     self.fanout.append(0)
     self.nsha = self.fanout[255]
     self.sha_ofs = 8 + 256*4
     self.ofstable_ofs = self.sha_ofs + self.nsha * 20 + self.nsha * 4
     self.ofs64table_ofs = self.ofstable_ofs + self.nsha * 4
     # Avoid slicing this for individual hashes (very high overhead)
     self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
Beispiel #10
0
 def __init__(self, filename, f):
     self.name = filename
     self.idxnames = [self.name]
     self.map = mmap_read(f)
     assert (str(self.map[0:8]) == '\377tOc\0\0\0\2')
     self.fanout = list(
         struct.unpack('!256I', str(buffer(self.map, 8, 256 * 4))))
     self.fanout.append(0)  # entry "-1"
     nsha = self.fanout[255]
     self.sha_ofs = 8 + 256 * 4
     self.shatable = buffer(self.map, self.sha_ofs, nsha * 20)
     self.ofstable = buffer(self.map, self.sha_ofs + nsha * 20 + nsha * 4,
                            nsha * 4)
     self.ofs64table = buffer(self.map,
                              8 + 256 * 4 + nsha * 20 + nsha * 4 + nsha * 4)
Beispiel #11
0
 def __init__(self, filename, f):
     self.name = filename
     self.idxnames = [self.name]
     self.map = mmap_read(f)
     assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
     self.fanout = list(struct.unpack('!256I',
                                      str(buffer(self.map, 8, 256*4))))
     self.fanout.append(0)  # entry "-1"
     nsha = self.fanout[255]
     self.sha_ofs = 8 + 256*4
     self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
     self.ofstable = buffer(self.map,
                            self.sha_ofs + nsha*20 + nsha*4,
                            nsha*4)
     self.ofs64table = buffer(self.map,
                              8 + 256*4 + nsha*20 + nsha*4 + nsha*4)