def scan_hlink(self, ctx): if os.name == 'nt': # Inode numbers not reported, so we canot detect hard links. return False if self.stats[stat.ST_NLINK] == 1: logging.debug("File %s has NLINK=1, can't be hard link", self.path()) return False inode_num = self.stats[stat.ST_INO] if ctx.inodes_db.has_key(inode_num): self.digest = ctx.inodes_db[inode_num][:Digest.dataDigestSize()] level_str = ctx.inodes_db[inode_num][Digest.dataDigestSize():] self.level = IntegerEncodings.binary_decode_int_varlen(level_str) return True return False
def restore(self, ctx): """ Recreate the data from the information stored in the backup """ logging.info("Restoring " + self.path()) # # Check if the file has already been processed # during this pass # if self.restore_hlink(ctx): return # # No, this file is new. Create it. # packer = PackerStream.PackerIStream(self.backup, self.digest, self.level) file = open(self.path(), "wb") for data in FileIO.read_blocks(packer, Digest.dataDigestSize()): #print "File", self.path(), "reading digest", # base64.b64encode(digest) file.write(data) file.close() self.restore_stats()
def scan_prev(self, ctx, prev_num): """ """ ctx.total_nodes += 1 if prev_num is None: cndb = self.backup.get_completed_nodes_db() path_digest = Digest.dataDigest(self.path().encode('utf8')) if cndb.has_key(path_digest): prev_data_is = StringIO.StringIO(cndb[path_digest]) prev_digest = prev_data_is.read(Digest.dataDigestSize()) prev_level = IntegerEncodings.binary_read_int_varlen(prev_data_is) prev_type = IntegerEncodings.binary_read_int_varlen(prev_data_is) #print "prev_stat_data->", base64.b64encode(prev_data_is.read()) prev_stat = unserialize_stats(prev_data_is) else: ctx.changed_nodes += 1 return False else: prev_type, prev_stat, prev_digest, prev_level = prev_num changed = False if prev_type != self.get_type(): logging.info("node type differs in the db") changed = True #elif (stat.S_IFMT(self.stats[stat.ST_MODE]) != #stat.S_IFMT(prev_stat[stat.ST_MODE])): #print " Node type differs in the fs" #changed = True elif prev_stat is None: logging.info("Base stat not defined") changed = True elif self.stats[stat.ST_INO] != prev_stat[stat.ST_INO]: logging.info("Inode of %s differs: was %d, now %d" % (self.path(), prev_stat[stat.ST_INO], self.stats[stat.ST_INO])) changed = True elif self.stats[stat.ST_MTIME] != prev_stat[stat.ST_MTIME]: logging.info("Mtime of %s differs: %d != %d" % (self.path(), self.stats[stat.ST_MTIME], prev_stat[stat.ST_MTIME])) changed = True elif time.time() - self.stats[stat.ST_MTIME] <= 1.0: # The time from the last change is less than the resolution # of time() functions logging.info("File %s too recent %d : %d" % (self.path(), prev_stat[stat.ST_MTIME], time.time())) changed = True else: # # OK, the prev node seems to be the same as this one. # Reuse it. # self.stats = prev_stat self.digest = prev_digest self.level = prev_level return True #print "changed node", self.path() ctx.changed_nodes += 1 return False
def retrieve(self, stream): """ Recreate the data from the information stored in the backup into the given stream """ logging.info("Retrieving file " + self.path()) packer = PackerStream.PackerIStream(self.backup, self.digest, self.level) for data in FileIO.read_blocks(packer, Digest.dataDigestSize()): stream.write(data)
def test(self, ctx): """ Test that loading the data from the storages is successful """ logging.info("Testing " + self.path()) packer = PackerStream.PackerIStream(self.backup, self.digest, self.level) for data in FileIO.read_blocks(packer, Digest.dataDigestSize()): # Do nothing with the data, just make sure it got loaded pass
def read_directory_entries(self, file): while True: node_type = Format.read_int(file) if node_type is None: raise StopIteration node_name = Format.read_string(file) node_digest = file.read(Digest.dataDigestSize()) node_level = IntegerEncodings.binary_read_int_varlen(file) node_stat = unserialize_stats(file) try: node_name_decoded = unicode(node_name, 'utf8') yield (node_type, node_name_decoded, node_stat, node_digest, node_level) except: logging.info("Encountered bad file name in " + self.path())
def write_block(self, data): digest = Digest.dataDigest(data) if self.backup.add_block(digest, self.code, data): self.num_new_blocks += 1 self.size_new_blocks += len(data) if self.rec_ostream is not None: self.rec_ostream.write(digest) else: self.digests.append(digest) if (Digest.dataDigestSize() * len(self.digests) > self.backup.get_block_size()): self.rec_ostream = PackerOStream(self.backup, self.code_packer, self.level + 1) for digest in self.digests: self.rec_ostream.write(digest) self.digests = None
def load_epoch_data(self): # So far, the cache is too resource-intensive. # Avoid keeping it persistently. return if not self.block_longevity_data.has_key("epoch"): self.epoch = 0 return self.epoch = int(self.block_longevity_data["epoch"]) longevity_os = StringIO.StringIO(self.block_longevity_data["data"]) while True: digest = longevity_os.read(Digest.dataDigestSize()) if len(digest) == 0: break longevity = IE.binary_read_int_varlen(longevity_os) epoch = IE.binary_read_int_varlen(longevity_os) self.block_longevity[digest] = longevity self.block_epoch[digest] = epoch
def scan(self, ctx, prev_num, exclusion_processor): """Scan the node, considering data in all the previous increments """ logging.debug("Scanning directory " + self.path()) self.compute_stats() ctx.num_visited_dirs_reporter.increment(1) # # Process data from previous increments. # ctx.total_nodes += 1 # prev data indexed by file, for directory scan prev_name_data = {} subdirs = [] # # Fetch prev information of this node # # Find the digest of prev node if it exists prev_digest = None if prev_num is not None: prev_type, prev_stat, prev_digest, prev_level = prev_num if prev_type != NODE_TYPE_DIR: prev_digest = None else: cndb = self.backup.get_completed_nodes_db() path_digest = Digest.dataDigest(self.path().encode('utf8')) if cndb.has_key(path_digest): prev_data_is = StringIO.StringIO(cndb[path_digest]) prev_digest = prev_data_is.read(Digest.dataDigestSize()) prev_level = IntegerEncodings.binary_read_int_varlen(prev_data_is) prev_type = IntegerEncodings.binary_read_int_varlen(prev_data_is) #print "prev_stat_data->", base64.b64encode(prev_data_is.read()) prev_stat = unserialize_stats(prev_data_is) if prev_type != self.get_type(): logging.debug("Node from cndb is not a directory!") prev_digest = None # Load the data of the prev node if prev_digest is not None: dir_stream = PackerStream.PackerIStream(self.backup, prev_digest, prev_level) for node_type, node_name, node_stat, node_digest, node_level in\ self.read_directory_entries(dir_stream): if node_type == NODE_TYPE_DIR: subdirs.append(node_name) prev_name_data[node_name] = ((node_type, node_stat, node_digest, node_level)) # # Scan the directory # exclusion_processor.filter_files() # Initialize scanning data self.children = [] num_children = len(exclusion_processor.get_included_files() + exclusion_processor.get_included_dirs()) processed_children = 0.0 # Scan the files in the directory for name in exclusion_processor.get_included_files(): path = os.path.join(self.path(), name) file_mode = os.lstat(path)[stat.ST_MODE] if prev_name_data.has_key(name): cur_prev = prev_name_data[name] else: cur_prev = None try: if stat.S_ISLNK(file_mode): node = Symlink(self.backup, self, name) node.scan(ctx, cur_prev) self.children.append(node) elif stat.S_ISREG(file_mode): node = File(self.backup, self, name) node.scan(ctx, cur_prev) self.children.append(node) else: ctx.unrecognized_files_reporter.append(path) logging.info("Ignoring unrecognized file type " + path) except OSError: logging.info("OSError accessing " + path) ctx.oserror_files_reporter.append(path) # traceback.print_exc() except IOError, (errno, strerror): logging.info("IOError %s accessing '%s' %s" % (errno, strerror, path)) ctx.ioerror_files_reporter.append(path) # traceback.print_exc() finally:
def next(self): digest = self.block_istream.read(Digest.dataDigestSize()) if len(digest) == 0: raise StopIteration return digest
def read_block(self): digest = self.digest_stream.read(Digest.dataDigestSize()) if len(digest) == 0: return "" return self.backup.load_block(digest)