def load(self, name_policy, num_blocks, tags=None): """ Initialize the instance by loading from an existing set of block files. """ self.names = name_policy if tags is None: tags = ['' for dummy in range(0, num_blocks)] # DESIGN INTENT: Meant for keeping track of Freenet CHK's assert len(tags) == num_blocks self.tags = tags[:] self.link_map = LinkMap() age, counts = self.link_map.read([self.full_path(ordinal, False) for ordinal in range(0, num_blocks)]) assert not has_internal_zero(counts) if max(counts) == 0: self.tags = self.tags[:1] # Length == 1 return age
class BlockStorage: """ A class to store history links in a collection of files. """ def __init__(self, tmps, name_policy=None): self.tmps = tmps self.names = name_policy self.tags = ['', ] # Is also a proxy for length. self.link_map = None # Hmmmm... file and stream belong in storage # but links belongs in the archive.... self.update_file = None self.update_stream = None self.update_links = [] def is_updating(self): """ Return True if updating, False otherwise. """ # Hmmm... return not self.update_stream is None def close(self): """ Close the files. """ self.abandon_update() if self.link_map is None: return self.link_map.close() self.link_map = None def full_path(self, ordinal, read=True): """ Return the full path to an underlying block file. """ if read: return self.names.read_path(ordinal) return self.names.write_path(ordinal) def get_history(self, head_sha1): """ Return the history link chain which has a head link with hash head_sha1. """ if head_sha1 == NULL_SHA: return [] ret = [] head = head_sha1 while True: link = self.link_map.get_link(head) ret.append(link[:]) # Copy if link[2] == NULL_SHA: return ret head = link[2] def start_update(self): """ Create temporary storage required to write an update. You MUST call commit_update() or abandon_update() after calling this. """ if not self.update_file is None: raise Exception("Commmit or abandon the previous update!") self.update_file = self.tmps.make_temp_file() self.update_links = [] raised = True try: self.update_stream = open(self.update_file, "wb") raised = False finally: if raised: self.abandon_update() # UpToDateException is recoverable, all others fatal. i.e. zorch instance. def commit_update(self, referenced_shas=None): """ Permanently write changes into the archive. This creates a new block which may replace an existing one. """ assert not referenced_shas is None if self.update_file is None or len(self.update_links) == None: UpToDateException("No changes to commit.") age = 0 # Automagically add history for self and parents for link in self.update_links: age = max(age, link[1]) # New link referenced_shas.add(link[0]) # Previous history # TRICKY: You can't call get_history on the new link itself # because it isn't commited yet. for child in self.get_history(link[2]): referenced_shas.add(child[0]) try: self.update_stream.close() self.update_stream = None # see is_updating() self.add_block(referenced_shas) return age finally: # Always clean up, even on success. # EXCEPTIONS ARE FATAL! self.abandon_update() def abandon_update(self): """ Free temporary storage associated with an update without committing it. """ self.update_links = [] if not self.update_stream is None: self.update_stream.close() self.update_stream = None if not self.update_file is None: self.tmps.remove_temp_file(self.update_file) self.update_file = None # Returns 0 def create(self, name_policy, num_blocks, overwrite=False ): """ Initialize the instance by creating a new set of empty block files. """ if name_policy.read_only: raise ValueError("Names are read only! Use load() instead?") self.names = name_policy self.tags = ['', ] # Length == 1 if not overwrite: for ordinal in range(0, num_blocks): if os.path.exists(self.full_path(ordinal, False)): raise IOError("Already exists: %s" % self.full_path(ordinal, False)) for ordinal in range(0, num_blocks): out_file = open(self.full_path(ordinal, False), 'wb') out_file.close() return self.load(name_policy, num_blocks) # hmmmm... want to use hash names for blocks # blocks is [[file_name, desc, dirty], ...] # returns maximum age def load(self, name_policy, num_blocks, tags=None): """ Initialize the instance by loading from an existing set of block files. """ self.names = name_policy if tags is None: tags = ['' for dummy in range(0, num_blocks)] # DESIGN INTENT: Meant for keeping track of Freenet CHK's assert len(tags) == num_blocks self.tags = tags[:] self.link_map = LinkMap() age, counts = self.link_map.read([self.full_path(ordinal, False) for ordinal in range(0, num_blocks)]) assert not has_internal_zero(counts) if max(counts) == 0: self.tags = self.tags[:1] # Length == 1 return age # Includes 0 length blocks def total_blocks(self): """ Return the total number of blocks including 0 length ones. """ return len(self.tags) # Hmmmm... physical length. def nonzero_blocks(self): """ Return the number of non-zero length blocks. """ for ordinal in range(0, len(self.tags)): if os.path.getsize(self.full_path(ordinal)) == 0: # Check for illegal internal zero length blocks. for index in range(ordinal + 1, len(self.tags)): if os.path.exists(self.full_path(index)): assert os.path.getsize(self.full_path(index)) == 0 return ordinal return len(self.tags) # This may delete self.update_file, but caller is # still responsible for cleaning it up. # # Breaks length and ordering invariants. def add_block(self, referenced_shas, tag=''): """ INTERNAL: Add the temporary update file to the permanent block files. """ assert not self.is_updating() assert not self.names.read_only update_len = os.path.getsize(self.update_file) head_len = os.path.getsize(self.full_path(0, False)) tmp = None try: # Doesn't change length if update_len + head_len < MIN_BLOCK_LEN: # Link map has open file descriptors. # Must close or os.remove() below fails on Windows. self.link_map.close() # We might merge with an empty block here, but it # doesn't matter since the length is bounded. Do better? # Can just append to the first block. # [N + O1] ... tmp = self.merge_blocks((self.update_file, self.full_path(0, False)), referenced_shas) if os.path.exists(self.full_path(0, False)): # REDFLAG: What if this fails? os.remove(self.full_path(0, False)) os.rename(tmp, self.full_path(0, False)) self.tags[0] = tag # Deletes update file IFF we get here. tmp = self.update_file self.update_file = None # Fix the link map. fixups = {} # Drop links in head block. for index in range(1, self.total_blocks()): fixups[index] = index # Potentially SLOW. self.link_map.update_blocks(fixups, [self.full_path(index, False) for index in range(0, self.total_blocks())], [0,]) # Read links from head block. return # Deletes update file always. tmp = self.update_file self.update_file = None # Close the link map before messing with files. self.link_map.close() self.prepend_block(tmp) self.tags.insert(0, tag) # Increments implicit length # Fix the link map. fixups = {} for index in range(0, self.total_blocks() - 1): # We inserted! fixups[index] = index + 1 # Potentially SLOW. self.link_map.update_blocks(fixups, [self.full_path(index, False) for index in range(0, self.total_blocks())], [0,]) # Read links from head block. finally: self.tmps.remove_temp_file(tmp) # Returns tmp file with merged blocks. # Caller must delete tmp file. def merge_blocks(self, block_file_list, referenced_shas): """ INTERNAL: Merge blocks into a single file. """ tmp = self.tmps.make_temp_file() copied_shas = set([]) raised = True try: out_file = open(tmp, 'wb') try: for name in block_file_list: in_file = open(name, "rb") try: # Hmmm... do something with count? #count = copy_raw_links(in_file, out_file, # referenced_shas) copy_raw_links(in_file, out_file, referenced_shas, copied_shas) finally: in_file.close() finally: out_file.close() raised = False return tmp finally: if raised: self.tmps.remove_temp_file(tmp) # Implementation helper function, caller deals with file cleanup. # REQUIRES: new_block not an extant block file. def prepend_block(self, new_block): """ INTERNAL: Insert a new block at the head of the block list. """ assert not self.is_updating() assert self.update_file is None # Shift all extant blocks up by one index for index in range(self.total_blocks() - 1, -1, -1): if os.path.exists(self.full_path(index + 1, False)): # REDFLAG: failure? os.remove(self.full_path(index + 1, False)) # REDFLAG: failure? os.rename(self.full_path(index, False), self.full_path(index + 1, False)) # Now copy the update block into the 0 position. os.rename(new_block, self.full_path(0, False)) def _make_new_files(self, new_blocks, referenced_shas, tmp_files): """ INTERNAL: Implementation helper for update_blocks(). """ new_files = {} for partition in new_blocks: # Calling code should have already dropped empty blocks. new_files[partition] = self.merge_blocks([self.full_path(index, False) for index in range(partition[0], partition[1] + 1)], referenced_shas) tmp_files.append(new_files[partition]) return new_files def _remove_old_files(self, dropped_blocks): """ INTERNAL: Implementation helper for update_blocks(). """ # Delete the files for dropped blocks for partition in dropped_blocks: assert partition[0] == partition[1] if not os.path.exists(self.full_path(partition[0], False)): continue os.remove(self.full_path(partition[0], False)) def _copy_old_blocks(self, old_blocks, tmp_files): """ INTERNAL: Implementation helper for update_blocks(). """ renamed = {} for partition in old_blocks: assert partition[0] == partition[1] src = self.full_path(partition[0], False) assert os.path.exists(src) dest = self.tmps.make_temp_file() tmp_files.append(dest) os.rename(src, dest) renamed[partition] = dest return renamed def _update_block_files(self, compressed, uncompressed, referenced_shas, tmp_files): """ INTERNAL: Implementation helper for update_blocks(). """ # Hmmm... to appease pylint max local vars constraint. #new_blocks = set(compressed) - set(uncompressed) old_blocks = set(compressed).intersection(set(uncompressed)) #dropped_blocks = set(uncompressed) - old_blocks # Build new blocks in tmp files new_files = self._make_new_files(set(compressed) - set(uncompressed), referenced_shas, tmp_files) # Delete the files for dropped blocks self._remove_old_files(set(uncompressed) - old_blocks) # Move old blocks into tmp files renamed = self._copy_old_blocks(old_blocks, tmp_files) new_tags = ['' for dummy in range(0, len(compressed))] new_indices = [] ordinal_fixups = {} # Rename blocks onto new block ordinals for index, block in enumerate(compressed): #hmmm not a set??? dest = self.full_path(index, False) assert not os.path.exists(dest) if block in set(compressed) - set(uncompressed): os.rename(new_files[block], dest) new_tags[index] = 'new' # best we can do. new_indices.append(index) continue assert block in old_blocks os.rename(renamed[block], dest) # Copy the old tag value into the right position new_tags[index] = self.tags[block[0]] # Save info we need to fix the link_map ordinal_fixups[block[0]] = index self.tags = new_tags return (new_tags, new_indices, ordinal_fixups) # REDFLAG: Failure. def update_blocks(self, uncompressed, compressed, referenced_shas, min_blocks): """ Repartition the underlying block files into the partitions described by compressed. """ assert not self.is_updating() assert not self.names.read_only tmp_files = [] try: self.link_map.close() self.tags, new_indices, ordinal_fixups = \ self._update_block_files(compressed, uncompressed, referenced_shas, tmp_files) # Drop links for unreferenced blocks and shift indices. # Then read links from new block files. self.link_map.update_blocks(ordinal_fixups, [self.full_path(index, False) for index in range(0, self.total_blocks())], new_indices) # Add trailing zero length blocks. for index in range(self.nonzero_blocks(), min_blocks): out_file = open(self.full_path(index, False), 'wb') out_file.close() finally: for name in tmp_files: self.tmps.remove_temp_file(name)