def __init__(self, name_map=None, history_sha=NULL_SHA): check_shas([history_sha, ]) if name_map == None: name_map = {} # name -> (file sha1, patch chain head sha1) self.name_map = name_map # Hmmmm... convenient, but it ties the manifest to an archive. self.stored_sha = history_sha
def get_file(self, history_sha, out_file): """ Get a file by the sha1 hash of the head link in its history link chain. """ check_shas([history_sha, ]) # hmmmm... if history_sha == NULL_SHA: tmp = open(out_file, 'wb') tmp.close() return self.delta_coder.apply_deltas(self.blocks.get_history(history_sha), out_file)
def from_archive(cls, archive, history_sha): """ Create a FileManifest from a file in the archive. """ check_shas([history_sha, ]) tmp_name = archive.blocks.tmps.make_temp_file() try: archive.get_file(history_sha, tmp_name) # Hmmmm... age... put back in manifest? name_map = manifest_from_file(tmp_name) return FileManifest(name_map, history_sha) finally: archive.blocks.tmps.remove_temp_file(tmp_name)
def __init__(self, name_map=None, history_sha=NULL_SHA): check_shas([ history_sha, ]) if name_map == None: name_map = {} # name -> (file sha1, patch chain head sha1) self.name_map = name_map # Hmmmm... convenient, but it ties the manifest to an archive. self.stored_sha = history_sha
def write_manifest(cls, archive, name_map, history_sha): """ Helper, writes updated manifest to archive. Returns link. """ check_shas([history_sha, ]) # Add manifest tmp_file_name = archive.blocks.tmps.make_temp_file() try: manifest_to_file(tmp_file_name, name_map) return archive.write_new_delta(history_sha, tmp_file_name) finally: archive.blocks.tmps.remove_temp_file(tmp_file_name)
def from_archive(cls, archive, history_sha): """ Create a FileManifest from a file in the archive. """ check_shas([ history_sha, ]) tmp_name = archive.blocks.tmps.make_temp_file() try: archive.get_file(history_sha, tmp_name) # Hmmmm... age... put back in manifest? name_map = manifest_from_file(tmp_name) return FileManifest(name_map, history_sha) finally: archive.blocks.tmps.remove_temp_file(tmp_name)
def write_manifest(cls, archive, name_map, history_sha): """ Helper, writes updated manifest to archive. Returns link. """ check_shas([ history_sha, ]) # Add manifest tmp_file_name = archive.blocks.tmps.make_temp_file() try: manifest_to_file(tmp_file_name, name_map) return archive.write_new_delta(history_sha, tmp_file_name) finally: archive.blocks.tmps.remove_temp_file(tmp_file_name)
def referenced_shas(self, head_sha_list, include_updates=True): """ Return the SHA1 hashes of all history links referenced by the links in the head_sha_list. """ check_shas(head_sha_list) ret = set([]) for head_sha in head_sha_list: for link in self.blocks.get_history(head_sha): ret.add(link[0]) if include_updates: ret = ret.union(self.uncommited_shas()) # Hmmm... frozenset faster? return ret
def update(self, archive, entry_infos, other_head_shas=None, truncate_manifest_history=False): """ Update the manifest with the changes in entry infos and write the changes and the updated manifest into the archive. """ if other_head_shas is None: other_head_shas = set([]) check_shas(other_head_shas) archive.start_update() raised = True try: prev_sha = self.stored_sha if truncate_manifest_history: prev_sha = NULL_SHA new_names, root_sha = self.write_changes(archive, entry_infos, prev_sha) # History for all files except recently modified ones. old_shas = set([]) new_shas = archive.uncommited_shas() for value in new_names.values(): if value[1] in new_shas: # Adding history for new values is handled by # commit_update(). continue # We need to explictly add history for the files which # still exist in the manifest but didn't change. for link in (archive.blocks.get_history(value[1])): old_shas.add(link[0]) all_shas = archive.referenced_shas(old_shas.union(other_head_shas)) archive.commit_update(all_shas) self.stored_sha = root_sha self.name_map = new_names raised = False finally: if raised: archive.abandon_update()
def update(self, archive, entry_infos, other_head_shas=None, truncate_manifest_history=False): """ Update the manifest with the changes in entry infos and write the changes and the updated manifest into the archive. """ if other_head_shas is None: other_head_shas = set([]) check_shas(other_head_shas) archive.start_update() raised = True try: prev_sha = self.stored_sha if truncate_manifest_history: prev_sha = NULL_SHA new_names, root_sha = self.write_changes(archive, entry_infos, prev_sha) # History for all files except recently modified ones. old_shas = set([]) new_shas = archive.uncommited_shas() for value in new_names.values(): if value[1] in new_shas: # Adding history for new values is handled by # commit_update(). continue # We need to explictly add history for the files which # still exist in the manifest but didn't change. for link in (archive.blocks.get_history(value[1])): old_shas.add(link[0]) all_shas = archive.referenced_shas(old_shas. union(other_head_shas)) archive.commit_update(all_shas) self.stored_sha = root_sha self.name_map = new_names raised = False finally: if raised: archive.abandon_update()
def compress(self, referenced_shas): """ Compresses the archive to fit in max_blocks blocks. REQUIRES: self.blocks.total_blocks() > max_blocks Merges blocks such that: n <= max_blocks and block[0] < block[1] ... < block[n -1] """ if referenced_shas is None: referenced_shas = set([]) check_shas(referenced_shas) #count = self.blocks.nonzero_blocks() # Compute the "real" size of each block without unreferenced links real_lens = [0 for dummy in range(0, len(self.blocks.tags))] for links in self.blocks.link_map.values(): for link in links: if not link[0] in referenced_shas: continue real_lens[link[5]] += link[6] uncompressed = [[index, index, real_lens[index]] for index in range(0, len(self.blocks.tags))] compressed = compress(uncompressed, self.max_blocks) # Can't put lists in a set. compressed = [tuple(value) for value in compressed] uncompressed = [tuple(value) for value in uncompressed] if compressed == uncompressed: return False self.blocks.update_blocks(uncompressed, compressed, referenced_shas, self.max_blocks) return True
def write_new_delta(self, history_sha, new_file): """ Writes a new history link to the update file. history_sha can be NULL_SHA. Can ignore history. i.e. not link to previous history. Returns the new link. REQUIRES: is updating. REQUIRES: history_sha is present in the currently committed version of the archive. You CANNOT reference uncommited history links. """ check_shas([history_sha, ]) self.require_blocks() if self.blocks.update_file is None: raise Exception("Not updating.") history = self.blocks.get_history(history_sha) tmp_file = self.blocks.tmps.make_temp_file() old_file = self.blocks.tmps.make_temp_file() oldest_delta = self.blocks.tmps.make_temp_file() blob_file = None try: # REDFLAG: Think through. # It would make more sense for the delta coder to decide when to # truncate history, but I don't want to expose the full archive # interface to the delta coder implementation. if len(history) >= MAX_HISTORY_LEN: # Delta to original file. self.get_file(history[-1][0], old_file) parent0 = self.delta_coder.make_delta(history[-1:], old_file, new_file, oldest_delta) # Full reinsert parent1 = self.delta_coder.make_full_insert(new_file, tmp_file) #print "full: %i old: %i delta: %i target: %i" % ( # os.path.getsize(tmp_file), # history[-1][6], # os.path.getsize(oldest_delta), # COALESCE_FACTOR * os.path.getsize(tmp_file)) # LATER: Back to this. # This is bottom up history shortening driven by the most # recent changes. We should also have some mechanism shortening # history (to 1 link) for files which haven't changed in many # updates, whenever blocks are merged. # Hmmmm... hard (impossible?) to decouple from manifest because # files are addressed by head history link sha if (COALESCE_FACTOR * os.path.getsize(tmp_file) < (os.path.getsize(oldest_delta) + history[-1][6])): parent = parent1 blob_file = tmp_file #print "SHORTENED: FULL REINSERT" else: #print "history:" #for link in history: # print " ", str_sha(link[0]), str_sha(link[2]) parent = parent0 #print #print "parent: ", str_sha(parent) blob_file = oldest_delta #print "SHORTENED: COMPRESSED DELTAS" else: self.get_file(history_sha, old_file) parent = self.delta_coder.make_delta(history, old_file, new_file, tmp_file) blob_file = tmp_file self.blocks.update_links.append( write_raw_link(self.blocks.update_stream, self.age + 1, parent, blob_file, 0)) return self.blocks.update_links[-1] finally: self.blocks.tmps.remove_temp_file(old_file) self.blocks.tmps.remove_temp_file(oldest_delta) self.blocks.tmps.remove_temp_file(tmp_file)
def write_changes(self, archive, entry_infos, prev_manifest_sha=NULL_SHA): """ INTERNAL: Helper function for update(). Writes the changes required to add the IManifestEntries in entries_infos to an archive. Raises UpToDateException if there are no changes. Return an (updated_name_map, manifest_sha) tuple. """ check_shas([prev_manifest_sha, ]) file_sha_map = self.make_file_sha_map() new_name_map = {} updated = False for info in entry_infos: full_path = info.make_file() try: name = info.get_name() if not is_printable_ascii(name): raise IOError("Non-ASCII name: %s" % repr(name)) hash_info = self.name_map.get(name, None) file_sha = get_file_sha(full_path) if hash_info is None: updated = True if file_sha in file_sha_map: # Renamed new_name_map[name] = file_sha_map[file_sha] else: # REDFLAG: We lose history for files which are renamed # and modified. # Created (or renamed and modified) link = archive.write_new_delta(NULL_SHA, full_path) new_name_map[name] = (file_sha, link[0]) else: if self.name_map[name][0] == file_sha: # Exists in manifest and is unmodified. new_name_map[name] = self.name_map[name] continue # Modified updated = True link = archive.write_new_delta(self.name_map[name][1], full_path) new_name_map[name] = (file_sha, link[0]) # delete == ophaned history, NOP finally: info.release() if not updated: if (frozenset(new_name_map.keys()) == frozenset(self.name_map.keys())): raise UpToDateException("The file manifest is up to date.") # Add updated manifest link = FileManifest.write_manifest(archive, new_name_map, prev_manifest_sha) return (new_name_map, link[0])
def write_changes(self, archive, entry_infos, prev_manifest_sha=NULL_SHA): """ INTERNAL: Helper function for update(). Writes the changes required to add the IManifestEntries in entries_infos to an archive. Raises UpToDateException if there are no changes. Return an (updated_name_map, manifest_sha) tuple. """ check_shas([ prev_manifest_sha, ]) file_sha_map = self.make_file_sha_map() new_name_map = {} updated = False for info in entry_infos: full_path = info.make_file() try: name = info.get_name() if not is_printable_ascii(name): raise IOError("Non-ASCII name: %s" % repr(name)) hash_info = self.name_map.get(name, None) file_sha = get_file_sha(full_path) if hash_info is None: updated = True if file_sha in file_sha_map: # Renamed new_name_map[name] = file_sha_map[file_sha] else: # REDFLAG: We lose history for files which are renamed # and modified. # Created (or renamed and modified) link = archive.write_new_delta(NULL_SHA, full_path) new_name_map[name] = (file_sha, link[0]) else: if self.name_map[name][0] == file_sha: # Exists in manifest and is unmodified. new_name_map[name] = self.name_map[name] continue # Modified updated = True link = archive.write_new_delta(self.name_map[name][1], full_path) new_name_map[name] = (file_sha, link[0]) # delete == ophaned history, NOP finally: info.release() if not updated: if (frozenset(new_name_map.keys()) == frozenset( self.name_map.keys())): raise UpToDateException("The file manifest is up to date.") # Add updated manifest link = FileManifest.write_manifest(archive, new_name_map, prev_manifest_sha) return (new_name_map, link[0])