Ejemplo n.º 1
0
    def test_pack_writing(self):
        # see how fast we can write a pack from object streams.
        # This will not be fast, as we take time for decompressing the streams as well
        ostream = CountedNullStream()
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        ni = 5000
        count = 0
        total_size = 0
        st = time()
        for sha in pdb.sha_iter():
            count += 1
            pdb.stream(sha)
            if count == ni:
                break
        #END gather objects for pack-writing
        elapsed = time() - st
        print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (
            ni, elapsed, ni / elapsed)

        st = time()
        PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()),
                              ostream.write,
                              object_count=ni)
        elapsed = time() - st
        total_kb = ostream.bytes_written() / 1000
        print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (
            total_kb, elapsed, total_kb / elapsed)
    def test_pack_writing(self):
        # see how fast we can write a pack from object streams.
        # This will not be fast, as we take time for decompressing the streams as well
        ostream = CountedNullStream()
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        ni = 1000
        count = 0
        st = time()
        for sha in pdb.sha_iter():
            count += 1
            pdb.stream(sha)
            if count == ni:
                break
        # END gather objects for pack-writing
        elapsed = time() - st
        print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" %
              (ni, elapsed, ni / (elapsed or 1)), file=sys.stderr)

        st = time()
        PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni)
        elapsed = time() - st
        total_kb = ostream.bytes_written() / 1000
        print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" %
              (total_kb, elapsed, total_kb / (elapsed or 1)), sys.stderr)
Ejemplo n.º 3
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print("PDB: looked up %i shas by index in %f s ( %f shas/s )" %
              (ns, elapsed, ns / elapsed),
              file=sys.stderr)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        # END shuffle shas
        st = time()
        for sha in sha_list:
            pdb_pack_info(sha)
        # END for each sha to look up
        elapsed = time() - st

        # discard cache
        del (pdb._entities)
        pdb.entities()
        print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" %
              (ns, len(pdb.entities()), elapsed, ns / elapsed),
              file=sys.stderr)
        # END for each random mode

        # query info and streams only
        max_items = 10000  # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" %
                  (max_items, pdb_fun.__name__.upper(), elapsed,
                   max_items / elapsed),
                  file=sys.stderr)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            read_len = len(stream.read())
            assert read_len == stream.size
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print(
            "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )"
            % (max_items, total_kib, total_kib / elapsed, elapsed,
               max_items / elapsed),
            file=sys.stderr)
	def test_pack_random_access(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# sha lookup
		st = time()
		sha_list = list(pdb.sha_iter())
		elapsed = time() - st
		ns = len(sha_list)
		print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
		
		# sha lookup: best-case and worst case access
		pdb_pack_info = pdb._pack_info
		access_times = list()
		for rand in range(2):
			if rand:
				random.shuffle(sha_list)
			# END shuffle shas
			st = time()
			for sha in sha_list:
				pdb_pack_info(sha)
			# END for each sha to look up
			elapsed = time() - st
			access_times.append(elapsed)
			
			# discard cache
			del(pdb._entities)
			pdb.entities()
			print >> sys.stderr, "PDB: looked up %i sha in %i packs (random=%i) in %f s ( %f shas/s )" % (ns, len(pdb.entities()), rand, elapsed, ns / elapsed)
		# END for each random mode
		elapsed_order, elapsed_rand = access_times
		
		# well, its never really sequencial regarding the memory patterns, but it 
		# shows how well the prioriy cache performs
		print >> sys.stderr, "PDB: sequential access is %f %% faster than random-access" % (100 - ((elapsed_order / elapsed_rand) * 100))
		
		
		# query info and streams only
		max_items = 10000			# can wait longer when testing memory
		for pdb_fun in (pdb.info, pdb.stream):
			st = time()
			for sha in sha_list[:max_items]:
				pdb_fun(sha)
			elapsed = time() - st
			print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
		# END for each function
		
		# retrieve stream and read all
		max_items = 5000
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in sha_list[:max_items]:
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
Ejemplo n.º 5
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print("PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed), file=sys.stderr)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        # END shuffle shas
        st = time()
        for sha in sha_list:
            pdb_pack_info(sha)
        # END for each sha to look up
        elapsed = time() - st

        # discard cache
        del(pdb._entities)
        pdb.entities()
        print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" %
              (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr)
        # END for each random mode

        # query info and streams only
        max_items = 10000           # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" %
                  (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            read_len = len(stream.read())
            assert read_len == stream.size
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" %
              (max_items, total_kib, total_kib / elapsed, elapsed, max_items / elapsed), file=sys.stderr)
	def test_stream_reading(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# streaming only, meant for --with-profile runs
		ni = 5000
		count = 0
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in pdb.sha_iter():
			if count == ni:
				break
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
			count += 1
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
Ejemplo n.º 7
0
	def test_stream_reading(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# streaming only, meant for --with-profile runs
		ni = 5000
		count = 0
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in pdb.sha_iter():
			if count == ni:
				break
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
			count += 1
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
Ejemplo n.º 8
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (
            ns, elapsed, ns / elapsed)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        access_times = list()
        for rand in range(2):
            if rand:
                random.shuffle(sha_list)
            # END shuffle shas
            st = time()
            for sha in sha_list:
                pdb_pack_info(sha)
            # END for each sha to look up
            elapsed = time() - st
            access_times.append(elapsed)

            # discard cache
            del (pdb._entities)
            pdb.entities()
            print >> sys.stderr, "PDB: looked up %i sha in %i packs (random=%i) in %f s ( %f shas/s )" % (
                ns, len(pdb.entities()), rand, elapsed, ns / elapsed)
        # END for each random mode
        elapsed_order, elapsed_rand = access_times

        # well, its never really sequencial regarding the memory patterns, but it
        # shows how well the prioriy cache performs
        print >> sys.stderr, "PDB: sequential access is %f %% faster than random-access" % (
            100 - ((elapsed_order / elapsed_rand) * 100))

        # query info and streams only
        max_items = 10000  # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (
                max_items, pdb_fun.__name__.upper(), elapsed,
                max_items / elapsed)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            stream.read()
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (
            max_items, total_kib, total_kib / elapsed, elapsed,
            max_items / elapsed)