def test_pack_writing(self):
		# see how fast we can write a pack from object streams.
		# This will not be fast, as we take time for decompressing the streams as well
		ostream = CountedNullStream()
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		ni = 5000
		count = 0
		total_size = 0
		st = time()
		objs = list()
		for sha in pdb.sha_iter():
			count += 1
			objs.append(pdb.stream(sha))
			if count == ni:
				break
		#END gather objects for pack-writing
		elapsed = time() - st
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed)
		
		st = time()
		PackEntity.write_pack(objs, ostream.write)
		elapsed = time() - st
		total_kb = ostream.bytes_written() / 1000
		print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
Exemple #2
0
    def test_pack_writing(self):
        # see how fast we can write a pack from object streams.
        # This will not be fast, as we take time for decompressing the streams as well
        ostream = CountedNullStream()
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        ni = 5000
        count = 0
        total_size = 0
        st = time()
        for sha in pdb.sha_iter():
            count += 1
            pdb.stream(sha)
            if count == ni:
                break
        #END gather objects for pack-writing
        elapsed = time() - st
        print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (
            ni, elapsed, ni / elapsed)

        st = time()
        PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()),
                              ostream.write,
                              object_count=ni)
        elapsed = time() - st
        total_kb = ostream.bytes_written() / 1000
        print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (
            total_kb, elapsed, total_kb / elapsed)
Exemple #3
0
    def update_cache(self, force=False):
        """
        Update our cache with the acutally existing packs on disk. Add new ones,
        and remove deleted ones. We keep the unchanged ones

        :param force: If True, the cache will be updated even though the directory
            does not appear to have changed according to its modification timestamp.
        :return: True if the packs have been updated so there is new information,
            False if there was no change to the pack database"""
        stat = os.stat(self.root_path())
        if not force and stat.st_mtime <= self._st_mtime:
            return False
        # END abort early on no change
        self._st_mtime = stat.st_mtime

        # packs are supposed to be prefixed with pack- by git-convention
        # get all pack files, figure out what changed
        pack_files = set(
            glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
        our_pack_files = {item[1].pack().path() for item in self._entities}

        # new packs
        for pack_file in (pack_files - our_pack_files):
            # init the hit-counter/priority with the size, a good measure for hit-
            # probability. Its implemented so that only 12 bytes will be read
            entity = PackEntity(pack_file)
            self._entities.append(
                [entity.pack().size(), entity,
                 entity.index().sha_to_index])
        # END for each new packfile

        # removed packs
        for pack_file in (our_pack_files - pack_files):
            del_index = -1
            for i, item in enumerate(self._entities):
                if item[1].pack().path() == pack_file:
                    del_index = i
                    break
                # END found index
            # END for each entity
            assert del_index != -1
            del (self._entities[del_index])
        # END for each removed pack

        # reinitialize prioritiess
        self._sort_entities()
        return True
Exemple #4
0
 def update_cache(self, force=False):
     """
     Update our cache with the acutally existing packs on disk. Add new ones, 
     and remove deleted ones. We keep the unchanged ones
     
     :param force: If True, the cache will be updated even though the directory
         does not appear to have changed according to its modification timestamp.
     :return: True if the packs have been updated so there is new information, 
         False if there was no change to the pack database"""
     stat = os.stat(self.root_path())
     if not force and stat.st_mtime <= self._st_mtime:
         return False
     # END abort early on no change
     self._st_mtime = stat.st_mtime
     
     # packs are supposed to be prefixed with pack- by git-convention
     # get all pack files, figure out what changed
     pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
     our_pack_files = set(item[1].pack().path() for item in self._entities)
     
     # new packs
     for pack_file in (pack_files - our_pack_files):
         # init the hit-counter/priority with the size, a good measure for hit-
         # probability. Its implemented so that only 12 bytes will be read
         entity = PackEntity(pack_file)
         self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
     # END for each new packfile
     
     # removed packs
     for pack_file in (our_pack_files - pack_files):
         del_index = -1
         for i, item in enumerate(self._entities):
             if item[1].pack().path() == pack_file:
                 del_index = i
                 break
             # END found index
         # END for each entity
         assert del_index != -1
         del(self._entities[del_index])
     # END for each removed pack
     
     # reinitialize prioritiess
     self._sort_entities()
     return True
	def test_pack_entity(self):
		for packinfo, indexinfo in (	(self.packfile_v2_1, self.packindexfile_v1), 
										(self.packfile_v2_2, self.packindexfile_v2),
										(self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
			packfile, version, size = packinfo
			indexfile, version, size = indexinfo
			entity = PackEntity(packfile)
			assert entity.pack().path() == packfile
			assert entity.index().path() == indexfile
			
			count = 0
			for info, stream in izip(entity.info_iter(), entity.stream_iter()):
				count += 1
				assert info.binsha == stream.binsha
				assert len(info.binsha) == 20
				assert info.type_id == stream.type_id
				assert info.size == stream.size
				
				# we return fully resolved items, which is implied by the sha centric access
				assert not info.type_id in delta_types
				
				# try all calls
				assert len(entity.collect_streams(info.binsha))
				oinfo = entity.info(info.binsha)
				assert isinstance(oinfo, OInfo)
				assert oinfo.binsha is not None
				ostream = entity.stream(info.binsha)
				assert isinstance(ostream, OStream)
				assert ostream.binsha is not None
				
				# verify the stream
				try:
					assert entity.is_valid_stream(info.binsha, use_crc=True)
				except UnsupportedOperation:
					pass
				# END ignore version issues
				assert entity.is_valid_stream(info.binsha, use_crc=False)
			# END for each info, stream tuple
			assert count == size
Exemple #6
0
    def test_pack_entity(self):
        for packinfo, indexinfo in ((self.packfile_v2_1,
                                     self.packindexfile_v1),
                                    (self.packfile_v2_2,
                                     self.packindexfile_v2),
                                    (self.packfile_v2_3_ascii,
                                     self.packindexfile_v2_3_ascii)):
            packfile, version, size = packinfo
            indexfile, version, size = indexinfo
            entity = PackEntity(packfile)
            assert entity.pack().path() == packfile
            assert entity.index().path() == indexfile

            count = 0
            for info, stream in izip(entity.info_iter(), entity.stream_iter()):
                count += 1
                assert info.binsha == stream.binsha
                assert len(info.binsha) == 20
                assert info.type_id == stream.type_id
                assert info.size == stream.size

                # we return fully resolved items, which is implied by the sha centric access
                assert not info.type_id in delta_types

                # try all calls
                assert len(entity.collect_streams(info.binsha))
                oinfo = entity.info(info.binsha)
                assert isinstance(oinfo, OInfo)
                assert oinfo.binsha is not None
                ostream = entity.stream(info.binsha)
                assert isinstance(ostream, OStream)
                assert ostream.binsha is not None

                # verify the stream
                try:
                    assert entity.is_valid_stream(info.binsha, use_crc=True)
                except UnsupportedOperation:
                    pass
                # END ignore version issues
                assert entity.is_valid_stream(info.binsha, use_crc=False)
            # END for each info, stream tuple
            assert count == size
Exemple #7
0
	def test_pack_entity(self, rw_dir):
		pack_objs = list()
		for packinfo, indexinfo in (	(self.packfile_v2_1, self.packindexfile_v1), 
										(self.packfile_v2_2, self.packindexfile_v2),
										(self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
			packfile, version, size = packinfo
			indexfile, version, size = indexinfo
			entity = PackEntity(packfile)
			assert entity.pack().path() == packfile
			assert entity.index().path() == indexfile
			pack_objs.extend(entity.stream_iter())
			
			count = 0
			for info, stream in izip(entity.info_iter(), entity.stream_iter()):
				count += 1
				assert info.binsha == stream.binsha
				assert len(info.binsha) == 20
				assert info.type_id == stream.type_id
				assert info.size == stream.size
				
				# we return fully resolved items, which is implied by the sha centric access
				assert not info.type_id in delta_types
				
				# try all calls
				assert len(entity.collect_streams(info.binsha))
				oinfo = entity.info(info.binsha)
				assert isinstance(oinfo, OInfo)
				assert oinfo.binsha is not None
				ostream = entity.stream(info.binsha)
				assert isinstance(ostream, OStream)
				assert ostream.binsha is not None
				
				# verify the stream
				try:
					assert entity.is_valid_stream(info.binsha, use_crc=True)
				except UnsupportedOperation:
					pass
				# END ignore version issues
				assert entity.is_valid_stream(info.binsha, use_crc=False)
			# END for each info, stream tuple
			assert count == size
			
		# END for each entity
		
		# pack writing - write all packs into one
		# index path can be None
		pack_path = tempfile.mktemp('', "pack", rw_dir)
		index_path = tempfile.mktemp('', 'index', rw_dir)
		iteration = 0
		def rewind_streams():
			for obj in pack_objs: 
				obj.stream.seek(0)
		#END utility
		for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)):
			pfile = open(ppath, 'wb')
			iwrite = None
			if ipath:
				ifile = open(ipath, 'wb')
				iwrite = ifile.write
			#END handle ip
			
			# make sure we rewind the streams ... we work on the same objects over and over again
			if iteration > 0: 
				rewind_streams()
			#END rewind streams
			iteration += 1
			
			pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
			pfile.close()
			assert os.path.getsize(ppath) > 100
			
			# verify pack
			pf = PackFile(ppath)
			assert pf.size() == len(pack_objs)
			assert pf.version() == PackFile.pack_version_default
			assert pf.checksum() == pack_sha
			
			# verify index
			if ipath is not None:
				ifile.close()
				assert os.path.getsize(ipath) > 100
				idx = PackIndexFile(ipath)
				assert idx.version() == PackIndexFile.index_version_default
				assert idx.packfile_checksum() == pack_sha
				assert idx.indexfile_checksum() == index_sha
				assert idx.size() == len(pack_objs)
			#END verify files exist
		#END for each packpath, indexpath pair
		
		# verify the packs throughly
		rewind_streams()
		entity = PackEntity.create(pack_objs, rw_dir)
		count = 0
		for info in entity.info_iter():
			count += 1
			for use_crc in range(2):
				assert entity.is_valid_stream(info.binsha, use_crc)
			# END for each crc mode
		#END for each info
		assert count == len(pack_objs)
Exemple #8
0
    def test_pack_entity(self, rw_dir):
        pack_objs = list()
        for packinfo, indexinfo in ((self.packfile_v2_1,
                                     self.packindexfile_v1),
                                    (self.packfile_v2_2,
                                     self.packindexfile_v2),
                                    (self.packfile_v2_3_ascii,
                                     self.packindexfile_v2_3_ascii)):
            packfile, version, size = packinfo
            indexfile, version, size = indexinfo
            entity = PackEntity(packfile)
            assert entity.pack().path() == packfile
            assert entity.index().path() == indexfile
            pack_objs.extend(entity.stream_iter())

            count = 0
            for info, stream in zip(entity.info_iter(), entity.stream_iter()):
                count += 1
                assert info.binsha == stream.binsha
                assert len(info.binsha) == 20
                assert info.type_id == stream.type_id
                assert info.size == stream.size

                # we return fully resolved items, which is implied by the sha centric access
                assert not info.type_id in delta_types

                # try all calls
                assert len(entity.collect_streams(info.binsha))
                oinfo = entity.info(info.binsha)
                assert isinstance(oinfo, OInfo)
                assert oinfo.binsha is not None
                ostream = entity.stream(info.binsha)
                assert isinstance(ostream, OStream)
                assert ostream.binsha is not None

                # verify the stream
                try:
                    assert entity.is_valid_stream(info.binsha, use_crc=True)
                except UnsupportedOperation:
                    pass
                # END ignore version issues
                assert entity.is_valid_stream(info.binsha, use_crc=False)
            # END for each info, stream tuple
            assert count == size

        # END for each entity

        # pack writing - write all packs into one
        # index path can be None
        pack_path1 = tempfile.mktemp('', "pack1", rw_dir)
        pack_path2 = tempfile.mktemp('', "pack2", rw_dir)
        index_path = tempfile.mktemp('', 'index', rw_dir)
        iteration = 0

        def rewind_streams():
            for obj in pack_objs:
                obj.stream.seek(0)

        # END utility
        for ppath, ipath, num_obj in zip((pack_path1, pack_path2),
                                         (index_path, None),
                                         (len(pack_objs), None)):
            iwrite = None
            if ipath:
                ifile = open(ipath, 'wb')
                iwrite = ifile.write
            # END handle ip

            # make sure we rewind the streams ... we work on the same objects over and over again
            if iteration > 0:
                rewind_streams()
            # END rewind streams
            iteration += 1

            with open(ppath, 'wb') as pfile:
                pack_sha, index_sha = PackEntity.write_pack(
                    pack_objs, pfile.write, iwrite, object_count=num_obj)
            assert os.path.getsize(ppath) > 100

            # verify pack
            pf = PackFile(ppath)
            assert pf.size() == len(pack_objs)
            assert pf.version() == PackFile.pack_version_default
            assert pf.checksum() == pack_sha
            pf.close()

            # verify index
            if ipath is not None:
                ifile.close()
                assert os.path.getsize(ipath) > 100
                idx = PackIndexFile(ipath)
                assert idx.version() == PackIndexFile.index_version_default
                assert idx.packfile_checksum() == pack_sha
                assert idx.indexfile_checksum() == index_sha
                assert idx.size() == len(pack_objs)
                idx.close()
            # END verify files exist
        # END for each packpath, indexpath pair

        # verify the packs thoroughly
        rewind_streams()
        entity = PackEntity.create(pack_objs, rw_dir)
        count = 0
        for info in entity.info_iter():
            count += 1
            for use_crc in range(2):
                assert entity.is_valid_stream(info.binsha, use_crc)
            # END for each crc mode
        # END for each info
        assert count == len(pack_objs)
        entity.close()