Exemple #1
0
 def __init__(self, filename):
     self.filename = filename
     self.m = b''
     self.writable = False
     self.count = 0
     f = None
     try:
         f = open(filename, 'rb+')
     except IOError as e:
         if e.errno == errno.ENOENT:
             pass
         else:
             raise
     if f:
         b = f.read(len(INDEX_HDR))
         if b != INDEX_HDR:
             log('warning: %s: header: expected %r, got %r\n' %
                 (filename, INDEX_HDR, b))
         else:
             st = os.fstat(f.fileno())
             if st.st_size:
                 self.m = mmap_readwrite(f)
                 self.writable = True
                 self.count = struct.unpack(
                     FOOTER_SIG, self.m[st.st_size - FOOTLEN:st.st_size])[0]
Exemple #2
0
 def __init__(self, filename):
     self.filename = filename
     self.m = ''
     self.writable = False
     self.count = 0
     f = None
     try:
         f = open(filename, 'r+')
     except IOError as e:
         if e.errno == errno.ENOENT:
             pass
         else:
             raise
     if f:
         b = f.read(len(INDEX_HDR))
         if b != INDEX_HDR:
             log('warning: %s: header: expected %r, got %r\n'
                              % (filename, INDEX_HDR, b))
         else:
             st = os.fstat(f.fileno())
             if st.st_size:
                 self.m = mmap_readwrite(f)
                 self.writable = True
                 self.count = struct.unpack(FOOTER_SIG,
                       str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
Exemple #3
0
    def __init__(self, filename, f=None, readwrite=False, expected=-1):
        self.closed = False
        self.name = filename
        self.readwrite = readwrite
        self.file = None
        self.map = None
        assert (filename.endswith(b'.bloom'))
        if readwrite:
            assert (expected > 0)
            self.file = f = f or open(filename, 'r+b')
            f.seek(0)

            # Decide if we want to mmap() the pages as writable ('immediate'
            # write) or else map them privately for later writing back to
            # the file ('delayed' write).  A bloom table's write access
            # pattern is such that we dirty almost all the pages after adding
            # very few entries.  But the table is so big that dirtying
            # *all* the pages often exceeds Linux's default
            # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
            # thus causing it to start flushing the table before we're
            # finished... even though there's more than enough space to
            # store the bloom table in RAM.
            #
            # To work around that behaviour, if we calculate that we'll
            # probably end up touching the whole table anyway (at least
            # one bit flipped per memory page), let's use a "private" mmap,
            # which defeats Linux's ability to flush it to disk.  Then we'll
            # flush it as one big lump during close().
            pages = os.fstat(f.fileno()).st_size // 4096 * 5  # assume k=5
            self.delaywrite = expected > pages
            debug1('bloom: delaywrite=%r\n' % self.delaywrite)
            if self.delaywrite:
                self.map = mmap_readwrite_private(self.file, close=False)
            else:
                self.map = mmap_readwrite(self.file, close=False)
        else:
            self.file = f or open(filename, 'rb')
            self.map = mmap_read(self.file)
        got = self.map[0:4]
        if got != b'BLOM':
            log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
            self._init_failed()
            return
        ver = struct.unpack('!I', self.map[4:8])[0]
        if ver < BLOOM_VERSION:
            log('Warning: ignoring old-style (v%d) bloom %r\n' %
                (ver, filename))
            self._init_failed()
            return
        if ver > BLOOM_VERSION:
            log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename))
            self._init_failed()
            return

        self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
        idxnamestr = self.map[16 + 2**self.bits:]
        if idxnamestr:
            self.idxnames = idxnamestr.split(b'\0')
        else:
            self.idxnames = []
Exemple #4
0
Fichier : bloom.py Projet : bup/bup
    def __init__(self, filename, f=None, readwrite=False, expected=-1):
        self.name = filename
        self.rwfile = None
        self.map = None
        assert(filename.endswith('.bloom'))
        if readwrite:
            assert(expected > 0)
            self.rwfile = f = f or open(filename, 'r+b')
            f.seek(0)

            # Decide if we want to mmap() the pages as writable ('immediate'
            # write) or else map them privately for later writing back to
            # the file ('delayed' write).  A bloom table's write access
            # pattern is such that we dirty almost all the pages after adding
            # very few entries.  But the table is so big that dirtying
            # *all* the pages often exceeds Linux's default
            # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
            # thus causing it to start flushing the table before we're
            # finished... even though there's more than enough space to
            # store the bloom table in RAM.
            #
            # To work around that behaviour, if we calculate that we'll
            # probably end up touching the whole table anyway (at least
            # one bit flipped per memory page), let's use a "private" mmap,
            # which defeats Linux's ability to flush it to disk.  Then we'll
            # flush it as one big lump during close().
            pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5
            self.delaywrite = expected > pages
            debug1('bloom: delaywrite=%r\n' % self.delaywrite)
            if self.delaywrite:
                self.map = mmap_readwrite_private(self.rwfile, close=False)
            else:
                self.map = mmap_readwrite(self.rwfile, close=False)
        else:
            self.rwfile = None
            f = f or open(filename, 'rb')
            self.map = mmap_read(f)
        got = str(self.map[0:4])
        if got != 'BLOM':
            log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
            return self._init_failed()
        ver = struct.unpack('!I', self.map[4:8])[0]
        if ver < BLOOM_VERSION:
            log('Warning: ignoring old-style (v%d) bloom %r\n' 
                % (ver, filename))
            return self._init_failed()
        if ver > BLOOM_VERSION:
            log('Warning: ignoring too-new (v%d) bloom %r\n'
                % (ver, filename))
            return self._init_failed()

        self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
        idxnamestr = str(self.map[16 + 2**self.bits:])
        if idxnamestr:
            self.idxnames = idxnamestr.split('\0')
        else:
            self.idxnames = []
Exemple #5
0
    def write(self, filename, packbin):
        ofs64_count = 0
        for section in self.idx:
            for entry in section:
                if entry[2] >= 2**31:
                    ofs64_count += 1

        # Length: header + fan-out + shas-and-crcs + overflow-offsets
        index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
        idx_map = None
        idx_f = open(filename, 'w+b')
        try:
            idx_f.truncate(index_len)
            fdatasync(idx_f.fileno())
            idx_map = mmap_readwrite(idx_f, close=False)
            try:
                count = _helpers.write_idx(filename, idx_map, self.idx,
                                           self.count)
                assert (count == self.count)
                idx_map.flush()
            finally:
                idx_map.close()
        finally:
            idx_f.close()

        idx_f = open(filename, 'a+b')
        try:
            idx_f.write(packbin)
            idx_f.seek(0)
            idx_sum = Sha1()
            b = idx_f.read(8 + 4 * 256)
            idx_sum.update(b)

            obj_list_sum = Sha1()
            for b in chunkyreader(idx_f, 20 * self.count):
                idx_sum.update(b)
                obj_list_sum.update(b)
            namebase = hexlify(obj_list_sum.digest())

            for b in chunkyreader(idx_f):
                idx_sum.update(b)
            idx_f.write(idx_sum.digest())
            fdatasync(idx_f.fileno())
            return namebase
        finally:
            idx_f.close()
Exemple #6
0
    def _write_pack_idx_v2(self, filename, idx, packbin):
        ofs64_count = 0
        for section in idx:
            for entry in section:
                if entry[2] >= 2**31:
                    ofs64_count += 1

        # Length: header + fan-out + shas-and-crcs + overflow-offsets
        index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
        idx_map = None
        idx_f = open(filename, 'w+b')
        try:
            idx_f.truncate(index_len)
            fdatasync(idx_f.fileno())
            idx_map = mmap_readwrite(idx_f, close=False)
            try:
                count = _helpers.write_idx(filename, idx_map, idx, self.count)
                assert(count == self.count)
                idx_map.flush()
            finally:
                idx_map.close()
        finally:
            idx_f.close()

        idx_f = open(filename, 'a+b')
        try:
            idx_f.write(packbin)
            idx_f.seek(0)
            idx_sum = Sha1()
            b = idx_f.read(8 + 4*256)
            idx_sum.update(b)

            obj_list_sum = Sha1()
            for b in chunkyreader(idx_f, 20*self.count):
                idx_sum.update(b)
                obj_list_sum.update(b)
            namebase = obj_list_sum.hexdigest()

            for b in chunkyreader(idx_f):
                idx_sum.update(b)
            idx_f.write(idx_sum.digest())
            fdatasync(idx_f.fileno())
            return namebase
        finally:
            idx_f.close()
Exemple #7
0
Fichier : midx.py Projet : bup/bup
def _do_midx(outdir, outfilename, infilenames, prefixstr,
             auto=False, force=False):
    global _first
    if not outfilename:
        assert(outdir)
        sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
        outfilename = b'%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
        debug1('midx: %s%screating from %d files (%d objects).\n'
               % (dirprefix, prefixstr, len(infilenames), total))
        if (auto and (total < 1024 and len(infilenames) < 3)) \
           or ((auto or force) and len(infilenames) < 2) \
           or (force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total/SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'wb') as f:
            f.write(b'MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert(f.tell() == 12)

            f.truncate(12 + 4*entries + 20*total + 4*total)
            f.flush()
            fdatasync(f.fileno())

            fmap = mmap_readwrite(f, close=False)
            count = merge_into(fmap, bits, total, inp)
            del fmap # Assume this calls msync() now.
            f.seek(0, os.SEEK_END)
            f.write(b'\0'.join(allfilenames))
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None


    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        log(repr(p.idxnames) + '\n')
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = next(pi)
            assert(i == pin)
            assert(p.exists(i))

    return total, outfilename
Exemple #8
0
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n'
               % (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total/SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'wb') as f:
            f.write('MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert(f.tell() == 12)

            f.truncate(12 + 4*entries + 20*total + 4*total)
            f.flush()
            fdatasync(f.fileno())

            fmap = mmap_readwrite(f, close=False)

            count = merge_into(fmap, bits, total, inp)
            del fmap # Assume this calls msync() now.
            f.seek(0, os.SEEK_END)
            f.write('\0'.join(allfilenames))
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None


    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = pi.next()
            assert(i == pin)
            assert(p.exists(i))

    return total, outfilename