Beispiel #1
0
    def _end(self, run_midx=True):
        f = self.file
        if not f: return None
        self.file = None
        try:
            self.objcache = None
            idx = self.idx
            self.idx = None

            # update object count
            f.seek(8)
            cp = struct.pack('!i', self.count)
            assert (len(cp) == 4)
            f.write(cp)

            # calculate the pack sha1sum
            f.seek(0)
            sum = Sha1()
            for b in chunkyreader(f):
                sum.update(b)
            packbin = sum.digest()
            f.write(packbin)
            fdatasync(f.fileno())
        finally:
            f.close()

        obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx,
                                               packbin)

        nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
        if os.path.exists(self.filename + '.map'):
            os.unlink(self.filename + '.map')
        os.rename(self.filename + '.pack', nameprefix + '.pack')
        os.rename(self.filename + '.idx', nameprefix + '.idx')
        try:
            os.fsync(self.parentfd)
        except OSError as e:
            # Ignore EINVAL (*only*) since some fs don't support this (e.g. cifs).
            if e.errno != errno.EINVAL:
                raise
        finally:
            os.close(self.parentfd)

        if run_midx:
            auto_midx(repo('objects/pack'))

        if self.on_pack_finish:
            self.on_pack_finish(nameprefix)

        return nameprefix
Beispiel #2
0
    def _end(self, run_midx=True):
        f = self.file
        if not f: return None
        self.file = None
        try:
            self.objcache = None
            idx = self.idx
            self.idx = None

            # update object count
            f.seek(8)
            cp = struct.pack('!i', self.count)
            assert (len(cp) == 4)
            f.write(cp)

            # calculate the pack sha1sum
            f.seek(0)
            sum = Sha1()
            for b in chunkyreader(f):
                sum.update(b)
            packbin = sum.digest()
            f.write(packbin)
            fdatasync(f.fileno())
        finally:
            f.close()

        obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx,
                                               packbin)
        nameprefix = os.path.join(self.repo_dir,
                                  'objects/pack/pack-' + obj_list_sha)
        if os.path.exists(self.filename + '.map'):
            os.unlink(self.filename + '.map')
        os.rename(self.filename + '.pack', nameprefix + '.pack')
        os.rename(self.filename + '.idx', nameprefix + '.idx')
        try:
            os.fsync(self.parentfd)
        except:
            pass
        finally:
            os.close(self.parentfd)

        if run_midx:
            auto_midx(os.path.join(self.repo_dir, 'objects/pack'))

        if self.on_pack_finish:
            self.on_pack_finish(nameprefix)

        return nameprefix
Beispiel #3
0
    def write(self, filename, packbin):
        ofs64_count = 0
        for section in self.idx:
            for entry in section:
                if entry[2] >= 2**31:
                    ofs64_count += 1

        # Length: header + fan-out + shas-and-crcs + overflow-offsets
        index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
        idx_map = None
        idx_f = open(filename, 'w+b')
        try:
            idx_f.truncate(index_len)
            fdatasync(idx_f.fileno())
            idx_map = mmap_readwrite(idx_f, close=False)
            try:
                count = _helpers.write_idx(filename, idx_map, self.idx,
                                           self.count)
                assert (count == self.count)
                idx_map.flush()
            finally:
                idx_map.close()
        finally:
            idx_f.close()

        idx_f = open(filename, 'a+b')
        try:
            idx_f.write(packbin)
            idx_f.seek(0)
            idx_sum = Sha1()
            b = idx_f.read(8 + 4 * 256)
            idx_sum.update(b)

            obj_list_sum = Sha1()
            for b in chunkyreader(idx_f, 20 * self.count):
                idx_sum.update(b)
                obj_list_sum.update(b)
            namebase = hexlify(obj_list_sum.digest())

            for b in chunkyreader(idx_f):
                idx_sum.update(b)
            idx_f.write(idx_sum.digest())
            fdatasync(idx_f.fileno())
            return namebase
        finally:
            idx_f.close()
Beispiel #4
0
    def _write_pack_idx_v2(self, filename, idx, packbin):
        ofs64_count = 0
        for section in idx:
            for entry in section:
                if entry[2] >= 2**31:
                    ofs64_count += 1

        # Length: header + fan-out + shas-and-crcs + overflow-offsets
        index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
        idx_map = None
        idx_f = open(filename, 'w+b')
        try:
            idx_f.truncate(index_len)
            fdatasync(idx_f.fileno())
            idx_map = mmap_readwrite(idx_f, close=False)
            try:
                count = _helpers.write_idx(filename, idx_map, idx, self.count)
                assert(count == self.count)
                idx_map.flush()
            finally:
                idx_map.close()
        finally:
            idx_f.close()

        idx_f = open(filename, 'a+b')
        try:
            idx_f.write(packbin)
            idx_f.seek(0)
            idx_sum = Sha1()
            b = idx_f.read(8 + 4*256)
            idx_sum.update(b)

            obj_list_sum = Sha1()
            for b in chunkyreader(idx_f, 20*self.count):
                idx_sum.update(b)
                obj_list_sum.update(b)
            namebase = obj_list_sum.hexdigest()

            for b in chunkyreader(idx_f):
                idx_sum.update(b)
            idx_f.write(idx_sum.digest())
            fdatasync(idx_f.fileno())
            return namebase
        finally:
            idx_f.close()
Beispiel #5
0
    def _end(self, run_midx=True):
        f = self.file
        if not f: return None
        self.file = None
        try:
            self.objcache = None
            idx = self.idx
            self.idx = None

            # update object count
            f.seek(8)
            cp = struct.pack('!i', self.count)
            assert(len(cp) == 4)
            f.write(cp)

            # calculate the pack sha1sum
            f.seek(0)
            sum = Sha1()
            for b in chunkyreader(f):
                sum.update(b)
            packbin = sum.digest()
            f.write(packbin)
            fdatasync(f.fileno())
        finally:
            f.close()

        obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)

        nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
        if os.path.exists(self.filename + '.map'):
            os.unlink(self.filename + '.map')
        os.rename(self.filename + '.pack', nameprefix + '.pack')
        os.rename(self.filename + '.idx', nameprefix + '.idx')
        try:
            os.fsync(self.parentfd)
        finally:
            os.close(self.parentfd)

        if run_midx:
            auto_midx(repo('objects/pack'))

        if self.on_pack_finish:
            self.on_pack_finish(nameprefix)

        return nameprefix
Beispiel #6
0
Datei: midx.py Projekt: bup/bup
def _do_midx(outdir, outfilename, infilenames, prefixstr,
             auto=False, force=False):
    global _first
    if not outfilename:
        assert(outdir)
        sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
        outfilename = b'%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
        debug1('midx: %s%screating from %d files (%d objects).\n'
               % (dirprefix, prefixstr, len(infilenames), total))
        if (auto and (total < 1024 and len(infilenames) < 3)) \
           or ((auto or force) and len(infilenames) < 2) \
           or (force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total/SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'wb') as f:
            f.write(b'MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert(f.tell() == 12)

            f.truncate(12 + 4*entries + 20*total + 4*total)
            f.flush()
            fdatasync(f.fileno())

            fmap = mmap_readwrite(f, close=False)
            count = merge_into(fmap, bits, total, inp)
            del fmap # Assume this calls msync() now.
            f.seek(0, os.SEEK_END)
            f.write(b'\0'.join(allfilenames))
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None


    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        log(repr(p.idxnames) + '\n')
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = next(pi)
            assert(i == pin)
            assert(p.exists(i))

    return total, outfilename
Beispiel #7
0
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n'
               % (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total/SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'wb') as f:
            f.write('MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert(f.tell() == 12)

            f.truncate(12 + 4*entries + 20*total + 4*total)
            f.flush()
            fdatasync(f.fileno())

            fmap = mmap_readwrite(f, close=False)

            count = merge_into(fmap, bits, total, inp)
            del fmap # Assume this calls msync() now.
            f.seek(0, os.SEEK_END)
            f.write('\0'.join(allfilenames))
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None


    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = pi.next()
            assert(i == pin)
            assert(p.exists(i))

    return total, outfilename