Ejemplo n.º 1
0
    def move_in_thin_pack(self, path):
        """Move a specific file containing a pack into the pack directory.

        :note: The file should be on the same file system as the 
            packs directory.

        :param path: Path to the pack file.
        """
        data = PackData(path)

        # Write index for the thin pack (do we really need this?)
        temppath = os.path.join(self.pack_dir, 
            sha_to_hex(urllib2.randombytes(20))+".tempidx")
        data.create_index_v2(temppath, self.get_raw)
        p = Pack.from_objects(data, load_pack_index(temppath))

        # Write a full pack version
        temppath = os.path.join(self.pack_dir, 
            sha_to_hex(urllib2.randombytes(20))+".temppack")
        write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)), 
                len(p))
        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
        newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
        os.rename(temppath+".pack", newbasename+".pack")
        os.rename(temppath+".idx", newbasename+".idx")
        self._add_known_pack(newbasename)
Ejemplo n.º 2
0
 def data(self):
     """The pack data object being used."""
     if self._data is None:
         self._data = PackData(self._data_path)
         assert len(self.index) == len(self._data)
         idx_stored_checksum = self.index.get_pack_checksum()
         data_stored_checksum = self._data.get_stored_checksum()
         if idx_stored_checksum != data_stored_checksum:
             raise ChecksumMismatch(sha_to_hex(idx_stored_checksum), 
                                    sha_to_hex(data_stored_checksum))
     return self._data
Ejemplo n.º 3
0
 def get_raw(self, name):
     """Obtain the raw text for an object.
     
     :param name: sha for the object.
     :return: tuple with object type and object contents.
     """
     if len(name) == 40:
         sha = hex_to_sha(name)
         hexsha = name
     elif len(name) == 20:
         sha = name
         hexsha = None
     else:
         raise AssertionError
     for pack in self.packs:
         try:
             return pack.get_raw(sha)
         except KeyError:
             pass
     if hexsha is None: 
         hexsha = sha_to_hex(name)
     ret = self._get_shafile(hexsha)
     if ret is not None:
         return ret.type, ret.as_raw_string()
     raise KeyError(hexsha)
Ejemplo n.º 4
0
def read_cache_entry(f):
    """Read an entry from a cache file.

    :param f: File-like object to read from
    :return: tuple with: inode, device, mode, uid, gid, size, sha, flags
    """
    beginoffset = f.tell()
    ctime = read_cache_time(f)
    mtime = read_cache_time(f)
    (ino, dev, mode, uid, gid, size, sha, flags) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
    name = ""
    char = f.read(1)
    while char != "\0":
        name += char
        char = f.read(1)
    # Padding:
    real_size = (f.tell() - beginoffset + 7) & ~7
    f.seek(beginoffset + real_size)
    return (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha_to_hex(sha), flags)
Ejemplo n.º 5
0
class PackData(object):
  """The data contained in a packfile.

  Pack files can be accessed both sequentially for exploding a pack, and
  directly with the help of an index to retrieve a specific object.

  The objects within are either complete or a delta aginst another.

  The header is variable length. If the MSB of each byte is set then it
  indicates that the subsequent byte is still part of the header.
  For the first byte the next MS bits are the type, which tells you the type
  of object, and whether it is a delta. The LS byte is the lowest bits of the
  size. For each subsequent byte the LS 7 bits are the next MS bits of the
  size, i.e. the last byte of the header contains the MS bits of the size.

  For the complete objects the data is stored as zlib deflated data.
  The size in the header is the uncompressed object size, so to uncompress
  you need to just keep feeding data to zlib until you get an object back,
  or it errors on bad data. This is done here by just giving the complete
  buffer from the start of the deflated object on. This is bad, but until I
  get mmap sorted out it will have to do.

  Currently there are no integrity checks done. Also no attempt is made to try
  and detect the delta case, or a request for an object at the wrong position.
  It will all just throw a zlib or KeyError.
  """

  def __init__(self, filename):
    """Create a PackData object that represents the pack in the given filename.

    The file must exist and stay readable until the object is disposed of. It
    must also stay the same size. It will be mapped whenever needed.

    Currently there is a restriction on the size of the pack as the python
    mmap implementation is flawed.
    """
    self._filename = filename
    assert os.path.exists(filename), "%s is not a packfile" % filename
    self._size = os.path.getsize(filename)
    self._header_size = 12
    assert self._size >= self._header_size, "%s is too small for a packfile" % filename
    self._read_header()

  def _read_header(self):
    f = open(self._filename, 'rb')
    try:
        (version, self._num_objects) = \
                read_pack_header(f)
        f.seek(self._size-20)
        (self._stored_checksum,) = read_pack_tail(f)
    finally:
        f.close()

  def __len__(self):
      """Returns the number of objects in this pack."""
      return self._num_objects

  def calculate_checksum(self):
    f = open(self._filename, 'rb')
    try:
        map = simple_mmap(f, 0, self._size)
        return hashlib.sha1(map[:-20]).digest()
    finally:
        f.close()

  def iterobjects(self):
    offset = self._header_size
    f = open(self._filename, 'rb')
    for i in range(len(self)):
        map = simple_mmap(f, offset, self._size-offset)
        (type, obj, total_size) = unpack_object(map)
        yield offset, type, obj
        offset += total_size
    f.close()

  def iterentries(self, ext_resolve_ref=None):
    found = {}
    at = {}
    postponed = defaultdict(list)
    class Postpone(Exception):
        """Raised to postpone delta resolving."""
        
    def get_ref_text(sha):
        if sha in found:
            return found[sha]
        if ext_resolve_ref:
            try:
                return ext_resolve_ref(sha)
            except KeyError:
                pass
        raise Postpone, (sha, )
    todo = list(self.iterobjects())
    while todo:
      (offset, type, obj) = todo.pop(0)
      at[offset] = (type, obj)
      assert isinstance(offset, int)
      assert isinstance(type, int)
      assert isinstance(obj, tuple) or isinstance(obj, str)
      try:
        type, obj = resolve_object(offset, type, obj, get_ref_text,
            at.__getitem__)
      except Postpone, (sha, ):
        postponed[sha].append((offset, type, obj))
      else:
        shafile = ShaFile.from_raw_string(type, obj)
        sha = shafile.sha().digest()
        found[sha] = (type, obj)
        yield sha, offset, shafile.crc32()
        todo += postponed.get(sha, [])
    if postponed:
        raise KeyError([sha_to_hex(h) for h in postponed.keys()])