Esempio n. 1
0
def open_idx(filename):
    if filename.endswith('.idx'):
        f = open(filename, 'rb')
        header = f.read(8)
        if header[0:4] == '\377tOc':
            version = struct.unpack('!I', header[4:8])[0]
            if version == 2:
                return PackIdxV2(filename, f)
            else:
                raise GitError('%s: expected idx file version 2, got %d'
                               % (filename, version))
        elif len(header) == 8 and header[0:4] < '\377tOc':
            return PackIdxV1(filename, f)
        else:
            raise GitError('%s: unrecognized idx file header' % filename)
    elif filename.endswith('.midx'):
        return midx.PackMidx(filename)
    else:
        raise GitError('idx filenames must end with .idx or .midx')
Esempio n. 2
0
File: git.py Progetto: zzmjohn/bup
    def refresh(self, skip_midx=False):
        """Refresh the index list.
        This method verifies if .midx files were superseded (e.g. all of its
        contents are in another, bigger .midx file) and removes the superseded
        files.

        If skip_midx is True, all work on .midx files will be skipped and .midx
        files will be removed from the list.

        The instance variable 'ignore_midx' can force this function to
        always act as if skip_midx was True.
        """
        if self.bloom is not None:
            self.bloom.close()
        self.bloom = None  # Always reopen the bloom as it may have been relaced
        self.do_bloom = False
        skip_midx = skip_midx or self.ignore_midx
        d = dict((p.name, p) for p in self.packs
                 if not skip_midx or not isinstance(p, midx.PackMidx))
        if os.path.exists(self.dir):
            if not skip_midx:
                midxl = []
                midxes = set(glob.glob(os.path.join(self.dir, b'*.midx')))
                # remove any *.midx files from our list that no longer exist
                for ix in list(d.values()):
                    if not isinstance(ix, midx.PackMidx):
                        continue
                    if ix.name in midxes:
                        continue
                    # remove the midx
                    del d[ix.name]
                    ix.close()
                    self.packs.remove(ix)
                for ix in self.packs:
                    if isinstance(ix, midx.PackMidx):
                        for name in ix.idxnames:
                            d[os.path.join(self.dir, name)] = ix
                for full in midxes:
                    if not d.get(full):
                        mx = midx.PackMidx(full)
                        (mxd, mxf) = os.path.split(mx.name)
                        broken = False
                        for n in mx.idxnames:
                            if not os.path.exists(os.path.join(mxd, n)):
                                log(('warning: index %s missing\n'
                                     '  used by %s\n') %
                                    (path_msg(n), path_msg(mxf)))
                                broken = True
                        if broken:
                            mx.close()
                            del mx
                            unlink(full)
                        else:
                            midxl.append(mx)
                midxl.sort(
                    key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime))
                for ix in midxl:
                    any_needed = False
                    for sub in ix.idxnames:
                        found = d.get(os.path.join(self.dir, sub))
                        if not found or isinstance(found, PackIdx):
                            # doesn't exist, or exists but not in a midx
                            any_needed = True
                            break
                    if any_needed:
                        d[ix.name] = ix
                        for name in ix.idxnames:
                            d[os.path.join(self.dir, name)] = ix
                    elif not ix.force_keep:
                        debug1('midx: removing redundant: %s\n' %
                               path_msg(os.path.basename(ix.name)))
                        ix.close()
                        unlink(ix.name)
            for full in glob.glob(os.path.join(self.dir, b'*.idx')):
                if not d.get(full):
                    try:
                        ix = open_idx(full)
                    except GitError as e:
                        add_error(e)
                        continue
                    d[full] = ix
            bfull = os.path.join(self.dir, b'bup.bloom')
            if self.bloom is None and os.path.exists(bfull):
                self.bloom = bloom.ShaBloom(bfull)
            self.packs = list(set(d.values()))
            self.packs.sort(reverse=True, key=lambda x: len(x))
            if self.bloom and self.bloom.valid() and len(
                    self.bloom) >= len(self):
                self.do_bloom = True
            else:
                self.bloom = None
        debug1('PackIdxList: using %d index%s.\n' %
               (len(self.packs), len(self.packs) != 1 and 'es' or ''))
Esempio n. 3
0
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert (outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)

    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(lambda x, y: cmp(str(y[0][y[2]:y[2] + 20]),
                                  str(x[0][x[2]:x[2] + 20])))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir) + ': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n' %
               (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total / SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits))

        unlink(outfilename)
        f = open(outfilename + '.tmp', 'w+b')
        f.write('MIDX')
        f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
        assert (f.tell() == 12)

        f.truncate(12 + 4 * entries + 20 * total + 4 * total)
        f.flush()
        fdatasync(f.fileno())

        fmap = mmap_readwrite(f, close=False)

        count = merge_into(fmap, bits, total, inp)
        del fmap  # Assume this calls msync() now.
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None

    f.seek(0, os.SEEK_END)
    f.write('\0'.join(allfilenames))
    f.close()
    os.rename(outfilename + '.tmp', outfilename)

    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert (len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert (len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = pi.next()
            assert (i == pin)
            assert (p.exists(i))

    return total, outfilename