Example #1
0
 def test_correctness(self):
     pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
     # disabled for now as it used to work perfectly, checking big repositories takes a long time
     print("Endurance run: verify streaming of objects (crc and sha)",
           file=sys.stderr)
     for crc in range(2):
         count = 0
         st = time()
         for entity in pdb.entities():
             pack_verify = entity.is_valid_stream
             sha_by_index = entity.index().sha
             for index in xrange(entity.index().size()):
                 try:
                     assert pack_verify(sha_by_index(index), use_crc=crc)
                     count += 1
                 except UnsupportedOperation:
                     pass
                 # END ignore old indices
             # END for each index
         # END for each entity
         elapsed = time() - st
         print(
             "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" %
             (count, crc, elapsed, count / elapsed),
             file=sys.stderr)
Example #2
0
    def test_pack_writing(self):
        # see how fast we can write a pack from object streams.
        # This will not be fast, as we take time for decompressing the streams as well
        ostream = CountedNullStream()
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        ni = 5000
        count = 0
        total_size = 0
        st = time()
        for sha in pdb.sha_iter():
            count += 1
            pdb.stream(sha)
            if count == ni:
                break
        #END gather objects for pack-writing
        elapsed = time() - st
        print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (
            ni, elapsed, ni / elapsed)

        st = time()
        PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()),
                              ostream.write,
                              object_count=ni)
        elapsed = time() - st
        total_kb = ostream.bytes_written() / 1000
        print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (
            total_kb, elapsed, total_kb / elapsed)
Example #3
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print("PDB: looked up %i shas by index in %f s ( %f shas/s )" %
              (ns, elapsed, ns / elapsed),
              file=sys.stderr)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        # END shuffle shas
        st = time()
        for sha in sha_list:
            pdb_pack_info(sha)
        # END for each sha to look up
        elapsed = time() - st

        # discard cache
        del (pdb._entities)
        pdb.entities()
        print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" %
              (ns, len(pdb.entities()), elapsed, ns / elapsed),
              file=sys.stderr)
        # END for each random mode

        # query info and streams only
        max_items = 10000  # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" %
                  (max_items, pdb_fun.__name__.upper(), elapsed,
                   max_items / elapsed),
                  file=sys.stderr)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            read_len = len(stream.read())
            assert read_len == stream.size
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print(
            "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )"
            % (max_items, total_kib, total_kib / elapsed, elapsed,
               max_items / elapsed),
            file=sys.stderr)
Example #4
0
	def test_stream_reading(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# streaming only, meant for --with-profile runs
		ni = 5000
		count = 0
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in pdb.sha_iter():
			if count == ni:
				break
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
			count += 1
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
Example #5
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (
            ns, elapsed, ns / elapsed)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        access_times = list()
        for rand in range(2):
            if rand:
                random.shuffle(sha_list)
            # END shuffle shas
            st = time()
            for sha in sha_list:
                pdb_pack_info(sha)
            # END for each sha to look up
            elapsed = time() - st
            access_times.append(elapsed)

            # discard cache
            del (pdb._entities)
            pdb.entities()
            print >> sys.stderr, "PDB: looked up %i sha in %i packs (random=%i) in %f s ( %f shas/s )" % (
                ns, len(pdb.entities()), rand, elapsed, ns / elapsed)
        # END for each random mode
        elapsed_order, elapsed_rand = access_times

        # well, its never really sequencial regarding the memory patterns, but it
        # shows how well the prioriy cache performs
        print >> sys.stderr, "PDB: sequential access is %f %% faster than random-access" % (
            100 - ((elapsed_order / elapsed_rand) * 100))

        # query info and streams only
        max_items = 10000  # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (
                max_items, pdb_fun.__name__.upper(), elapsed,
                max_items / elapsed)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            stream.read()
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (
            max_items, total_kib, total_kib / elapsed, elapsed,
            max_items / elapsed)