def move_in_thin_pack(self, path):
        """Move a specific file containing a pack into the pack directory.

        :note: The file should be on the same file system as the 
            packs directory.

        :param path: Path to the pack file.
        """
        data = PackData(path)

        # Write index for the thin pack (do we really need this?)
        temppath = os.path.join(self.pack_dir, 
            sha_to_hex(urllib2.randombytes(20))+".tempidx")
        data.create_index_v2(temppath, self.get_raw)
        p = Pack.from_objects(data, load_pack_index(temppath))

        # Write a full pack version
        temppath = os.path.join(self.pack_dir, 
            sha_to_hex(urllib2.randombytes(20))+".temppack")
        write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)), 
                len(p))
        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
        newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
        os.rename(temppath+".pack", newbasename+".pack")
        os.rename(temppath+".idx", newbasename+".idx")
        self._add_known_pack(newbasename)
Example #2
0
    def move_in_thin_pack(self, path):
        """Move a specific file containing a pack into the pack directory.

        :note: The file should be on the same file system as the 
            packs directory.

        :param path: Path to the pack file.
        """
        data = PackData(path)

        # Write index for the thin pack (do we really need this?)
        temppath = os.path.join(self.pack_dir, 
            sha_to_hex(urllib2.randombytes(20))+".tempidx")
        data.create_index_v2(temppath, self.get_raw)
        p = Pack.from_objects(data, load_pack_index(temppath))

        # Write a full pack version
        temppath = os.path.join(self.pack_dir, 
            sha_to_hex(urllib2.randombytes(20))+".temppack")
        write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)), 
                len(p))
        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
        newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
        os.rename(temppath+".pack", newbasename+".pack")
        os.rename(temppath+".idx", newbasename+".idx")
        self._add_known_pack(newbasename)
Example #3
0
 def test_get_stored_checksum(self):
     p = self.get_pack_index(pack1_sha)
     self.assertEqual(
         b"f2848e2ad16f329ae1c92e3b95e91888daa5bd01",
         sha_to_hex(p.get_stored_checksum()),
     )
     self.assertEqual(
         b"721980e866af9a5f93ad674144e1459b8ba3e7b7",
         sha_to_hex(p.get_pack_checksum()),
     )
Example #4
0
 def data(self):
     """The pack data object being used."""
     if self._data is None:
         self._data = PackData(self._data_path)
         assert len(self.index) == len(self._data)
         idx_stored_checksum = self.index.get_pack_checksum()
         data_stored_checksum = self._data.get_stored_checksum()
         if idx_stored_checksum != data_stored_checksum:
             raise ChecksumMismatch(sha_to_hex(idx_stored_checksum), 
                                    sha_to_hex(data_stored_checksum))
     return self._data
Example #5
0
 def data(self):
     """The pack data object being used."""
     if self._data is None:
         self._data = PackData(self._data_path)
         assert len(self.index) == len(self._data)
         idx_stored_checksum = self.index.get_pack_checksum()
         data_stored_checksum = self._data.get_stored_checksum()
         if idx_stored_checksum != data_stored_checksum:
             raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
                                    sha_to_hex(data_stored_checksum))
     return self._data
Example #6
0
    def from_thinpack(cls, pack_store, f, indexer, resolve_ext_ref):
        entries = list(indexer)

        # Update the header with the new number of objects.
        f.seek(0)
        write_pack_header(f, len(entries) + len(indexer.ext_refs()))

        # Rescan the rest of the pack, computing the SHA with the new header.
        new_sha = compute_file_sha(f, end_ofs=-20)

        # Complete the pack.
        for ext_sha in indexer.ext_refs():
            assert len(ext_sha) == 20
            type_num, data = resolve_ext_ref(ext_sha)
            offset = f.tell()
            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
            entries.append((ext_sha, offset, crc32))
        pack_sha = new_sha.digest()
        f.write(pack_sha)
        #f.close()

        #write the pack
        blob_name = files.blobstore.create(
            mime_type='application/octet-stream')
        with files.open(blob_name, 'a') as blob:
            blob.write(f.getvalue())
        files.finalize(blob_name)

        #store pack info
        pack_store.data = files.blobstore.get_blob_key(blob_name)
        #pack_store.sha1 #sha's are added when writing the index
        pack_store.size = f.tell()
        pack_store.checksum = sha_to_hex(pack_sha)
        pack_store.save()

        # Write the index.
        pack_indexes = [pack_store]
        for (name, offset, entry_checksum) in entries:
            idx = PackStoreIndex(packref=pack_store,
                                 sha=sha_to_hex(name),
                                 offset=offset,
                                 crc32=entry_checksum)
            pack_store.sha1.append(sha_to_hex(name))
            pack_indexes.append(idx)
        db.save(pack_indexes)

        # Add the pack to the store and return it.
        final_pack = Pack(pack_store)
        final_pack.check_length_and_checksum()
        return final_pack
Example #7
0
    def read_objects(self):
        """Read the objects in this pack file.

        :raise AssertionError: if there is an error in the pack format.
        :raise ChecksumMismatch: if the checksum of the pack contents does not
            match the checksum in the pack trailer.
        :raise zlib.error: if an error occurred during zlib decompression.
        :raise IOError: if an error occurred writing to the output file.
        """
        pack_version, self._num_objects = read_pack_header(self.read)
        for i in xrange(self._num_objects):
            type, uncomp, comp_len, unused = unpack_object(self.read, self.recv)
            yield type, uncomp, comp_len

            # prepend any unused data to current read buffer
            buf = StringIO()
            buf.write(unused)
            buf.write(self._rbuf.read())
            buf.seek(0)
            self._rbuf = buf

        pack_sha = sha_to_hex(''.join([c for c in self._trailer]))
        calculated_sha = self.sha.hexdigest()
        if pack_sha != calculated_sha:
            raise ChecksumMismatch(pack_sha, calculated_sha)
Example #8
0
    def test_add_thin_pack(self):
        o = DiskObjectStore(self.store_dir)
        blob = make_object(Blob, data='yummy data')
        o.add_object(blob)

        f = StringIO()
        entries = build_pack(f, [
            (REF_DELTA, (blob.id, 'more yummy data')),
        ],
                             store=o)
        pack = o.add_thin_pack(f.read, None)
        try:
            packed_blob_sha = sha_to_hex(entries[0][3])
            pack.check_length_and_checksum()
            self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
            self.assertTrue(o.contains_packed(packed_blob_sha))
            self.assertTrue(o.contains_packed(blob.id))
            self.assertEqual((Blob.type_num, 'more yummy data'),
                             o.get_raw(packed_blob_sha))
        finally:
            # FIXME: DiskObjectStore should have close() which do the following:
            for p in o._pack_cache or []:
                p.close()

            pack.close()
Example #9
0
 def get_raw_no_repeat(self, bin_sha):
     """Wrapper around store.get_raw that doesn't allow repeat lookups."""
     hex_sha = sha_to_hex(bin_sha)
     self.assertFalse(hex_sha in self.fetched,
                      'Attempted to re-fetch object %s' % hex_sha)
     self.fetched.add(hex_sha)
     return self.store.get_raw(hex_sha)
Example #10
0
 def _to_hexsha(self, sha):
     if len(sha) == 40:
         return sha
     elif len(sha) == 20:
         return sha_to_hex(sha)
     else:
         raise ValueError("Invalid sha %r" % sha)
 def _to_hexsha(self, sha):
     if len(sha) == 40:
         return sha
     elif len(sha) == 20:
         return sha_to_hex(sha)
     else:
         raise ValueError("Invalid sha %r" % (sha,))
Example #12
0
 def test_iterentries(self):
     p = self.get_pack_index(pack1_sha)
     entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
     self.assertEqual(
         [('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
          ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
          ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)], entries)
 def get_raw(self, name):
     """Obtain the raw text for an object.
     
     :param name: sha for the object.
     :return: tuple with object type and object contents.
     """
     if len(name) == 40:
         sha = hex_to_sha(name)
         hexsha = name
     elif len(name) == 20:
         sha = name
         hexsha = None
     else:
         raise AssertionError
     for pack in self.packs:
         try:
             return pack.get_raw(sha)
         except KeyError:
             pass
     if hexsha is None: 
         hexsha = sha_to_hex(name)
     ret = self._get_shafile(hexsha)
     if ret is not None:
         return ret.type, ret.as_raw_string()
     raise KeyError(hexsha)
Example #14
0
    def get_raw(self, name):
        """Obtain the raw text for an object.

        :param name: sha for the object.
        :return: tuple with numeric type and object contents.
        """
        if len(name) == 40:
            sha = hex_to_sha(name)
            hexsha = name
        elif len(name) == 20:
            sha = name
            hexsha = None
        else:
            raise AssertionError("Invalid object name %r" % name)
        for pack in self.packs:
            try:
                return pack.get_raw(sha)
            except KeyError:
                pass
        if hexsha is None:
            hexsha = sha_to_hex(name)
        ret = self._get_loose_object(hexsha)
        if ret is not None:
            return ret.type_num, ret.as_raw_string()
        for alternate in self.alternates:
            try:
                return alternate.get_raw(hexsha)
            except KeyError:
                pass
        raise KeyError(hexsha)
Example #15
0
 def get_raw(self, name):
     """Obtain the raw text for an object.
     
     :param name: sha for the object.
     :return: tuple with object type and object contents.
     """
     if len(name) == 40:
         sha = hex_to_sha(name)
         hexsha = name
     elif len(name) == 20:
         sha = name
         hexsha = None
     else:
         raise AssertionError
     for pack in self.packs:
         try:
             return pack.get_raw(sha)
         except KeyError:
             pass
     if hexsha is None: 
         hexsha = sha_to_hex(name)
     ret = self._get_shafile(hexsha)
     if ret is not None:
         return ret.type, ret.as_raw_string()
     raise KeyError(hexsha)
Example #16
0
    def test_add_thin_pack(self):
        o = DiskObjectStore(self.store_dir)
        try:
            blob = make_object(Blob, data=b"yummy data")
            o.add_object(blob)

            f = BytesIO()
            entries = build_pack(
                f,
                [
                    (REF_DELTA, (blob.id, b"more yummy data")),
                ],
                store=o,
            )

            with o.add_thin_pack(f.read, None) as pack:
                packed_blob_sha = sha_to_hex(entries[0][3])
                pack.check_length_and_checksum()
                self.assertEqual(sorted([blob.id, packed_blob_sha]),
                                 list(pack))
                self.assertTrue(o.contains_packed(packed_blob_sha))
                self.assertTrue(o.contains_packed(blob.id))
                self.assertEqual(
                    (Blob.type_num, b"more yummy data"),
                    o.get_raw(packed_blob_sha),
                )
        finally:
            o.close()
Example #17
0
 def get_raw_no_repeat(self, bin_sha):
     """Wrapper around store.get_raw that doesn't allow repeat lookups."""
     hex_sha = sha_to_hex(bin_sha)
     self.assertFalse(hex_sha in self.fetched,
                      'Attempted to re-fetch object %s' % hex_sha)
     self.fetched.add(hex_sha)
     return self.store.get_raw(hex_sha)
Example #18
0
    def get_raw(self, name):
        """Obtain the raw fulltext for an object.

        :param name: sha for the object.
        :return: tuple with numeric type and object contents.
        """
        if len(name) == 40:
            sha = hex_to_sha(name)
            hexsha = name
        elif len(name) == 20:
            sha = name
            hexsha = None
        else:
            raise AssertionError("Invalid object name %r" % name)
        for pack in self.packs:
            try:
                return pack.get_raw(sha)
            except KeyError:
                pass
        if hexsha is None:
            hexsha = sha_to_hex(name)
        ret = self._get_loose_object(hexsha)
        if ret is not None:
            return ret.type_num, ret.as_raw_string()
        for alternate in self.alternates:
            try:
                return alternate.get_raw(hexsha)
            except KeyError:
                pass
        raise KeyError(hexsha)
Example #19
0
    def from_thinpack(cls, pack_store, f, indexer, resolve_ext_ref):
        entries = list(indexer)

        # Update the header with the new number of objects.
        f.seek(0)
        write_pack_header(f, len(entries) + len(indexer.ext_refs()))

        # Rescan the rest of the pack, computing the SHA with the new header.
        new_sha = compute_file_sha(f, end_ofs=-20)

        # Complete the pack.
        for ext_sha in indexer.ext_refs():
            assert len(ext_sha) == 20
            type_num, data = resolve_ext_ref(ext_sha)
            offset = f.tell()
            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
            entries.append((ext_sha, offset, crc32))
        pack_sha = new_sha.digest()
        f.write(pack_sha)
        # f.close()

        # write the pack
        blob_name = files.blobstore.create(mime_type="application/octet-stream")
        with files.open(blob_name, "a") as blob:
            blob.write(f.getvalue())
        files.finalize(blob_name)

        # store pack info
        pack_store.data = files.blobstore.get_blob_key(blob_name)
        # pack_store.sha1 #sha's are added when writing the index
        pack_store.size = f.tell()
        pack_store.checksum = sha_to_hex(pack_sha)
        pack_store.save()

        # Write the index.
        pack_indexes = [pack_store]
        for (name, offset, entry_checksum) in entries:
            idx = PackStoreIndex(packref=pack_store, sha=sha_to_hex(name), offset=offset, crc32=entry_checksum)
            pack_store.sha1.append(sha_to_hex(name))
            pack_indexes.append(idx)
        db.save(pack_indexes)

        # Add the pack to the store and return it.
        final_pack = Pack(pack_store)
        final_pack.check_length_and_checksum()
        return final_pack
Example #20
0
 def test_iterentries(self):
     p = self.get_pack_index(pack1_sha)
     entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()]
     self.assertEquals([
       ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, None),
       ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, None),
       ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, None)
       ], entries)
Example #21
0
 def test_iterentries(self):
     p = self.get_pack_data(pack1_sha)
     entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
     self.assertEquals(set([
       ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701L),
       ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690L),
       ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613L),
       ]), entries)
Example #22
0
 def test_iterentries(self):
     with self.get_pack_data(pack1_sha) as p:
         entries = set((sha_to_hex(s), o, c) for s, o, c in p.iterentries())
         self.assertEqual(set([
           ('6f670c0fb53f9463760b7295fbb814e965fb20c8', 178, 1373561701),
           ('b2a2766a2879c209ab1176e7e778b81ae422eeaa', 138, 912998690),
           ('f18faa16531ac570a3fdc8c7ca16682548dafd12', 12, 3775879613),
           ]), entries)
Example #23
0
    def get_raw(self, sha):
        """Get the raw representation of a Git object by SHA1.

        :param sha: SHA1 of the git object
        """
        if len(sha) == 20:
            sha = sha_to_hex(sha)
        obj = self[sha]
        return (obj.type, obj.as_raw_string())
Example #24
0
    def test_add_thin_pack(self):
        o = MemoryObjectStore()
        blob = make_object(Blob, data=b'yummy data')
        o.add_object(blob)

        f = BytesIO()
        entries = build_pack(f, [
            (REF_DELTA, (blob.id, b'more yummy data')),
            ], store=o)
        o.add_thin_pack(f.read, None)
        packed_blob_sha = sha_to_hex(entries[0][3])
        self.assertEqual((Blob.type_num, b'more yummy data'),
                         o.get_raw(packed_blob_sha))
Example #25
0
    def test_add_thin_pack(self):
        o = MemoryObjectStore()
        blob = make_object(Blob, data=b'yummy data')
        o.add_object(blob)

        f = BytesIO()
        entries = build_pack(f, [
            (REF_DELTA, (blob.id, b'more yummy data')),
            ], store=o)
        o.add_thin_pack(f.read, None)
        packed_blob_sha = sha_to_hex(entries[0][3])
        self.assertEqual((Blob.type_num, b'more yummy data'),
                         o.get_raw(packed_blob_sha))
Example #26
0
 def create(cls, pack_store, pack_data):
     # I think this is obsolete
     logging.error("UNUSED: gae_backend.py PackIndex.create()")
     for sha, offset, crc32 in pack_data.iterentries():
         sha = sha_to_hex(sha)
         pack_store.sha1.append(sha)
         PackStoreIndex(
             packref=pack_store, sha=sha, offset=offset, crc32=crc32
         ).save()  # this should be optimized, but as it's depricated there is little point
     t_checksum = pack_data.get_stored_checksum()
     pack_store.checksum = t_checksum
     pack_store.save()
     return cls(pack_store)
Example #27
0
def read_cache_entry(f):
    """Read an entry from a cache file.

    :param f: File-like object to read from
    :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
    """
    beginoffset = f.tell()
    ctime = read_cache_time(f)
    mtime = read_cache_time(f)
    (dev, ino, mode, uid, gid, size, sha, flags) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
    name = f.read((flags & 0x0FFF))
    # Padding:
    real_size = (f.tell() - beginoffset + 8) & ~7
    data = f.read((beginoffset + real_size) - f.tell())
    return (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha_to_hex(sha), flags & ~0x0FFF)
Example #28
0
 def lookup_git_sha(self, sha):
     if len(sha) == 20:
         sha = sha_to_hex(sha)
     value = self._get_entry((b"git", sha, b"X"))
     data = value.split(b" ", 3)
     if data[0] == b"commit":
         try:
             if data[3]:
                 verifiers = {"testament3-sha1": data[3]}
             else:
                 verifiers = {}
         except IndexError:
             verifiers = {}
         yield ("commit", (data[1], data[2], verifiers))
     else:
         yield (data[0].decode('ascii'), tuple(data[1:]))
Example #29
0
 def create(cls, pack_store, pack_data):
     #I think this is obsolete
     logging.error("UNUSED: gae_backend.py PackIndex.create()")
     for sha, offset, crc32 in pack_data.iterentries():
         sha = sha_to_hex(sha)
         pack_store.sha1.append(sha)
         PackStoreIndex(
             packref=pack_store,
             sha=sha,
             offset=offset,
             crc32=crc32,
         ).save(
         )  #this should be optimized, but as it's depricated there is little point
     t_checksum = pack_data.get_stored_checksum()
     pack_store.checksum = t_checksum
     pack_store.save()
     return cls(pack_store)
Example #30
0
def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
    """Read an entry from a cache file.

    Args:
      f: File-like object to read from
    Returns:
      tuple with: name, IndexEntry
    """
    beginoffset = f.tell()
    ctime = read_cache_time(f)
    mtime = read_cache_time(f)
    (
        dev,
        ino,
        mode,
        uid,
        gid,
        size,
        sha,
        flags,
    ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
    if flags & FLAG_EXTENDED:
        if version < 3:
            raise AssertionError('extended flag set in index with version < 3')
        (extended_flags, ) = struct.unpack(">H", f.read(2))
    else:
        extended_flags = 0
    name = f.read((flags & 0x0FFF))
    # Padding:
    if version < 4:
        real_size = (f.tell() - beginoffset + 8) & ~7
        f.read((beginoffset + real_size) - f.tell())
    return (name,
            IndexEntry(
                ctime,
                mtime,
                dev,
                ino,
                mode,
                uid,
                gid,
                size,
                sha_to_hex(sha),
                flags & ~0x0FFF,
                extended_flags,
            ))
Example #31
0
def read_cache_entry(f):
    """Read an entry from a cache file.

    :param f: File-like object to read from
    :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
    """
    beginoffset = f.tell()
    ctime = read_cache_time(f)
    mtime = read_cache_time(f)
    (dev, ino, mode, uid, gid, size, sha, flags, ) = \
        struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
    name = f.read((flags & 0x0fff))
    # Padding:
    real_size = ((f.tell() - beginoffset + 8) & ~7)
    f.read((beginoffset + real_size) - f.tell())
    return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
            sha_to_hex(sha), flags & ~0x0fff)
Example #32
0
    def test_add_thin_pack(self):
        o = DiskObjectStore(self.store_dir)
        blob = make_object(Blob, data='yummy data')
        o.add_object(blob)

        f = StringIO()
        entries = build_pack(f, [
          (REF_DELTA, (blob.id, 'more yummy data')),
          ], store=o)
        pack = o.add_thin_pack(f.read, None)

        packed_blob_sha = sha_to_hex(entries[0][3])
        pack.check_length_and_checksum()
        self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
        self.assertTrue(o.contains_packed(packed_blob_sha))
        self.assertTrue(o.contains_packed(blob.id))
        self.assertEqual((Blob.type_num, 'more yummy data'),
                         o.get_raw(packed_blob_sha))
Example #33
0
    def test_add_thin_pack(self):
        o = DiskObjectStore(self.store_dir)
        blob = make_object(Blob, data='yummy data')
        o.add_object(blob)

        f = StringIO()
        entries = build_pack(f, [
            (REF_DELTA, (blob.id, 'more yummy data')),
        ],
                             store=o)
        pack = o.add_thin_pack(f.read, None)

        packed_blob_sha = sha_to_hex(entries[0][3])
        pack.check_length_and_checksum()
        self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
        self.assertTrue(o.contains_packed(packed_blob_sha))
        self.assertTrue(o.contains_packed(blob.id))
        self.assertEqual((Blob.type_num, 'more yummy data'),
                         o.get_raw(packed_blob_sha))
Example #34
0
def _check_expected_sha(expected_sha, object):
    """Check whether an object matches an expected SHA.

    :param expected_sha: None or expected SHA as either binary or as hex digest
    :param object: Object to verify
    """
    if expected_sha is None:
        return
    if len(expected_sha) == 40:
        if expected_sha != object.sha().hexdigest().encode('ascii'):
            raise AssertionError("Invalid sha for %r: %s" %
                                 (object, expected_sha))
    elif len(expected_sha) == 20:
        if expected_sha != object.sha().digest():
            raise AssertionError("Invalid sha for %r: %s" %
                                 (object, sha_to_hex(expected_sha)))
    else:
        raise AssertionError("Unknown length %d for %r" %
                             (len(expected_sha), expected_sha))
    def get_raw(self, name):
        """Obtain the raw fulltext for an object.

        Args:
          name: sha for the object.
        Returns: tuple with numeric type and object contents.
        """
        if name == ZERO_SHA:
            raise KeyError(name)
        if len(name) == 40:
            sha = hex_to_sha(name)
            hexsha = name
        elif len(name) == 20:
            sha = name
            hexsha = None
        else:
            raise AssertionError("Invalid object name %r" % (name,))
        for pack in self._iter_cached_packs():
            try:
                return pack.get_raw(sha)
            except (KeyError, PackFileDisappeared):
                pass
        if hexsha is None:
            hexsha = sha_to_hex(name)
        ret = self._get_loose_object(hexsha)
        if ret is not None:
            return ret.type_num, ret.as_raw_string()
        # Maybe something else has added a pack with the object
        # in the mean time?
        for pack in self._update_pack_cache():
            try:
                return pack.get_raw(sha)
            except KeyError:
                pass
        for alternate in self.alternates:
            try:
                return alternate.get_raw(hexsha)
            except KeyError:
                pass
        raise KeyError(hexsha)
Example #36
0
    def get_raw(self, name):
        """Obtain the raw fulltext for an object.

        :param name: sha for the object.
        :return: tuple with numeric type and object contents.
        """
        if name == ZERO_SHA:
            raise KeyError(name)
        if len(name) == 40:
            sha = hex_to_sha(name)
            hexsha = name
        elif len(name) == 20:
            sha = name
            hexsha = None
        else:
            raise AssertionError("Invalid object name %r" % name)
        for pack in self._iter_cached_packs():
            try:
                return pack.get_raw(sha)
            except (KeyError, PackFileDisappeared):
                pass
        if hexsha is None:
            hexsha = sha_to_hex(name)
        ret = self._get_loose_object(hexsha)
        if ret is not None:
            return ret.type_num, ret.as_raw_string()
        # Maybe something else has added a pack with the object
        # in the mean time?
        for pack in self._update_pack_cache():
            try:
                return pack.get_raw(sha)
            except KeyError:
                pass
        for alternate in self.alternates:
            try:
                return alternate.get_raw(hexsha)
            except KeyError:
                pass
        raise KeyError(hexsha)
Example #37
0
    def test_add_thin_pack(self):
        o = DiskObjectStore(self.store_dir)
        blob = make_object(Blob, data='yummy data')
        o.add_object(blob)

        f = StringIO()
        entries = build_pack(f, [
          (REF_DELTA, (blob.id, 'more yummy data')),
          ], store=o)
        pack = o.add_thin_pack(f.read, None)
        try:
            packed_blob_sha = sha_to_hex(entries[0][3])
            pack.check_length_and_checksum()
            self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
            self.assertTrue(o.contains_packed(packed_blob_sha))
            self.assertTrue(o.contains_packed(blob.id))
            self.assertEqual((Blob.type_num, 'more yummy data'),
                             o.get_raw(packed_blob_sha))
        finally:
            # FIXME: DiskObjectStore should have close() which do the following:
            for p in o._pack_cache or []:
                p.close()

            pack.close()
Example #38
0
 def test_reverse(self):
     self.assertEquals("abcd" * 10, sha_to_hex("\xab\xcd" * 10))
Example #39
0
class PackData(object):
    """The data contained in a packfile.

    Pack files can be accessed both sequentially for exploding a pack, and
    directly with the help of an index to retrieve a specific object.

    The objects within are either complete or a delta aginst another.

    The header is variable length. If the MSB of each byte is set then it
    indicates that the subsequent byte is still part of the header.
    For the first byte the next MS bits are the type, which tells you the type
    of object, and whether it is a delta. The LS byte is the lowest bits of the
    size. For each subsequent byte the LS 7 bits are the next MS bits of the
    size, i.e. the last byte of the header contains the MS bits of the size.

    For the complete objects the data is stored as zlib deflated data.
    The size in the header is the uncompressed object size, so to uncompress
    you need to just keep feeding data to zlib until you get an object back,
    or it errors on bad data. This is done here by just giving the complete
    buffer from the start of the deflated object on. This is bad, but until I
    get mmap sorted out it will have to do.

    Currently there are no integrity checks done. Also no attempt is made to try
    and detect the delta case, or a request for an object at the wrong position.
    It will all just throw a zlib or KeyError.
    """
    def __init__(self, filename):
        """Create a PackData object that represents the pack in the given filename.

        The file must exist and stay readable until the object is disposed of. It
        must also stay the same size. It will be mapped whenever needed.

        Currently there is a restriction on the size of the pack as the python
        mmap implementation is flawed.
        """
        self._filename = filename
        assert os.path.exists(filename), "%s is not a packfile" % filename
        self._size = os.path.getsize(filename)
        self._header_size = 12
        assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (
            filename, self._size, self._header_size)
        self._file = open(self._filename, 'rb')
        self._read_header()
        self._offset_cache = LRUSizeCache(1024 * 1024 * 20,
                                          compute_size=_compute_object_size)

    def close(self):
        self._file.close()

    def _read_header(self):
        (version, self._num_objects) = read_pack_header(self._file)
        self._file.seek(self._size - 20)
        self._stored_checksum = self._file.read(20)

    def __len__(self):
        """Returns the number of objects in this pack."""
        return self._num_objects

    def calculate_checksum(self):
        """Calculate the checksum for this pack.

        :return: 20-byte binary SHA1 digest
        """
        map, map_offset = simple_mmap(self._file, 0, self._size - 20)
        try:
            return make_sha(map[map_offset:self._size - 20]).digest()
        finally:
            map.close()

    def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
        """Resolve an object, possibly resolving deltas when necessary.

        :return: Tuple with object type and contents.
        """
        if type not in (6, 7):  # Not a delta
            return type, obj

        if get_offset is None:
            get_offset = self.get_object_at

        if type == 6:  # offset delta
            (delta_offset, delta) = obj
            assert isinstance(delta_offset, int)
            assert isinstance(delta, str)
            base_offset = offset - delta_offset
            type, base_obj = get_offset(base_offset)
            assert isinstance(type, int)
        elif type == 7:  # ref delta
            (basename, delta) = obj
            assert isinstance(basename, str) and len(basename) == 20
            assert isinstance(delta, str)
            type, base_obj = get_ref(basename)
            assert isinstance(type, int)
            # Can't be a ofs delta, as we wouldn't know the base offset
            assert type != 6
            base_offset = None
        type, base_text = self.resolve_object(base_offset, type, base_obj,
                                              get_ref)
        if base_offset is not None:
            self._offset_cache[base_offset] = type, base_text
        ret = (type, apply_delta(base_text, delta))
        return ret

    def iterobjects(self, progress=None):
        class ObjectIterator(object):
            def __init__(self, pack):
                self.i = 0
                self.offset = pack._header_size
                self.num = len(pack)
                self.map, _ = simple_mmap(pack._file, 0, pack._size)

            def __del__(self):
                self.map.close()

            def __iter__(self):
                return self

            def __len__(self):
                return self.num

            def next(self):
                if self.i == self.num:
                    raise StopIteration
                (type, obj, total_size) = unpack_object(self.map, self.offset)
                crc32 = zlib.crc32(self.map[self.offset:self.offset +
                                            total_size]) & 0xffffffff
                ret = (self.offset, type, obj, crc32)
                self.offset += total_size
                if progress:
                    progress(self.i, self.num)
                self.i += 1
                return ret

        return ObjectIterator(self)

    def iterentries(self, ext_resolve_ref=None, progress=None):
        """Yield entries summarizing the contents of this pack.

        :param ext_resolve_ref: Optional function to resolve base
            objects (in case this is a thin pack)
        :param progress: Progress function, called with current and
            total object count.

        This will yield tuples with (sha, offset, crc32)
        """
        found = {}
        postponed = defaultdict(list)

        class Postpone(Exception):
            """Raised to postpone delta resolving."""

        def get_ref_text(sha):
            assert len(sha) == 20
            if sha in found:
                return self.get_object_at(found[sha])
            if ext_resolve_ref:
                try:
                    return ext_resolve_ref(sha)
                except KeyError:
                    pass
            raise Postpone, (sha, )

        extra = []
        todo = chain(self.iterobjects(progress=progress), extra)
        for (offset, type, obj, crc32) in todo:
            assert isinstance(offset, int)
            assert isinstance(type, int)
            assert isinstance(obj, tuple) or isinstance(obj, str)
            try:
                type, obj = self.resolve_object(offset, type, obj,
                                                get_ref_text)
            except Postpone, (sha, ):
                postponed[sha].append((offset, type, obj))
            else:
                shafile = ShaFile.from_raw_string(type, obj)
                sha = shafile.sha().digest()
                found[sha] = offset
                yield sha, offset, crc32
                extra.extend(postponed.get(sha, []))
        if postponed:
            raise KeyError([sha_to_hex(h) for h in postponed.keys()])
Example #40
0
 def check_sha(self):
     stored = self.f.read(20)
     if stored != self.sha1.digest():
         raise ChecksumMismatch(self.sha1.hexdigest(), sha_to_hex(stored))
Example #41
0
 def sha1s(self):
     """List the SHA1s."""
     for key in self._keys():
         if key.startswith(b"git\0"):
             yield sha_to_hex(key[4:])
Example #42
0
 def test_get_stored_checksum(self):
     p = self.get_pack_index(pack1_sha)
     self.assertEquals("f2848e2ad16f329ae1c92e3b95e91888daa5bd01", sha_to_hex(p.get_stored_checksum()))
     self.assertEquals("721980e866af9a5f93ad674144e1459b8ba3e7b7", sha_to_hex(p.get_pack_checksum()))
Example #43
0
 def test_reverse(self):
     self.assertEqual(b'abcd' * 10, sha_to_hex(b'\xab\xcd' * 10))
Example #44
0
 def lookup_blob_id(self, fileid, revision):
     return sha_to_hex(self.db[b"\0".join((b"blob", fileid, revision))])
Example #45
0
 def test_reverse(self):
     self.assertEqual(b'abcd' * 10, sha_to_hex(b'\xab\xcd' * 10))
Example #46
0
 def check_sha(self):
     stored = self.f.read(20)
     if stored != self.sha1.digest():
         raise ChecksumMismatch(self.sha1.hexdigest(), sha_to_hex(stored))
Example #47
0
 def lookup_commit(self, revid):
     try:
         return sha_to_hex(self.db[b"commit\0" + revid][:20])
     except KeyError:
         raise KeyError("No cache entry for %r" % revid)
Example #48
0
 def test_reverse(self):
     self.assertEqual("abcd" * 10, sha_to_hex("\xab\xcd" * 10))