def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) string_ios = list() # list of streams we previously created # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' print("Creating %s data ..." % desc, file=sys.stderr) st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print("Done (in %f s)" % elapsed, file=sys.stderr) string_ios.append(stream) # writing - due to the compression it will seem faster than it is st = time() sha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(bin_to_hex(sha)) fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / (elapsed_add or 1)), file=sys.stderr) # reading all at once st = time() ostream = ldb.stream(sha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / (elapsed_readall or 1)), file=sys.stderr) # reading in chunks of 1 MiB cs = 512 * 1000 chunks = list() st = time() ostream = ldb.stream(sha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert b''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / (elapsed_readchunks or 1)), file=sys.stderr) # del db file so we keep something to do ostream = None # To release the file handle (win) remove(db_file)
def store(self, istream): """note: The sha we produce will be hex by nature""" tmp_path = None writer = self.ostream() if writer is None: # open a tmp file to write the data to fd, tmp_path = tempfile.mkstemp(prefix="obj", dir=self._root_path) if istream.binsha is None: writer = FDCompressedSha1Writer(fd) else: writer = FDStream(fd) # END handle direct stream copies # END handle custom writer try: try: if istream.binsha is not None: # copy as much as possible, the actual uncompressed item size might # be smaller than the compressed version stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size) else: # write object with header, we have to make a new one write_object( istream.type, istream.size, istream.read, writer.write, chunk_size=self.stream_chunk_size ) # END handle direct stream copies finally: if tmp_path: writer.close() # END assure target stream is closed except: if tmp_path: os.remove(tmp_path) raise # END assure tmpfile removal on error hexsha = None if istream.binsha: hexsha = istream.hexsha else: hexsha = writer.sha(as_hex=True) # END handle sha if tmp_path: obj_path = self.db_path(self.object_path(hexsha)) obj_dir = dirname(obj_path) if not isdir(obj_dir): mkdir(obj_dir) # END handle destination directory # rename onto existing doesn't work on windows if os.name == "nt": if isfile(obj_path): remove(tmp_path) else: rename(tmp_path, obj_path) # end rename only if needed else: rename(tmp_path, obj_path) # END handle win32 # make sure its readable for all ! It started out as rw-- tmp file # but needs to be rwrr chmod(obj_path, self.new_objects_mode) # END handle dry_run istream.binsha = hex_to_bin(hexsha) return istream
def store(self, istream): """note: The sha we produce will be hex by nature""" tmp_path = None writer = self.ostream() if writer is None: # open a tmp file to write the data to fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) if istream.binsha is None: writer = FDCompressedSha1Writer(fd) else: writer = FDStream(fd) # END handle direct stream copies # END handle custom writer try: try: if istream.binsha is not None: # copy as much as possible, the actual uncompressed item size might # be smaller than the compressed version stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) else: # write object with header, we have to make a new one write_object(istream.type, istream.size, istream.read, writer.write, chunk_size=self.stream_chunk_size) # END handle direct stream copies finally: if tmp_path: writer.close() # END assure target stream is closed except: if tmp_path: os.remove(tmp_path) raise # END assure tmpfile removal on error hexsha = None if istream.binsha: hexsha = istream.hexsha else: hexsha = writer.sha(as_hex=True) # END handle sha if tmp_path: obj_path = self.db_path(self.object_path(hexsha)) obj_dir = dirname(obj_path) if not isdir(obj_dir): mkdir(obj_dir) # END handle destination directory # rename onto existing doesn't work on windows if os.name == 'nt' and isfile(obj_path): remove(obj_path) # END handle win322 rename(tmp_path, obj_path) # make sure its readable for all ! It started out as rw-- tmp file # but needs to be rwrr chmod(obj_path, self.new_objects_mode) # END handle dry_run istream.binsha = hex_to_bin(hexsha) return istream