def thaw_symlink(entry, fullpath): assert(isinstance(entry, SymlinkEntry)) # TODO: windows? starget = entry.symlink_target try: os.symlink(starget, fullpath) except OSError as e: if e.errno == errno.EEXIST and os.path.islink(fullpath): pass else: raise log(ProcessFileResult('OK'))
def do_check(conf, old_tree, target_name): errors = [] done_files = 0 skipped_files = 0 total_files = sum(1 for i in files_to_consider(conf, target_name)) for (full_path, target_path) in files_to_consider(conf, target_name): if os.path.islink(full_path): # TODO skipped_files += 1 continue done_files += 1 if done_files % 100 == 0: log(ProgressReport(done_files, total_files)) mtime_dt = datetime.utcfromtimestamp(os.path.getmtime(full_path)) check = True try: old_entry = old_tree.entries[target_path] if old_entry.last_hashed < mtime_dt: # File has been modified, can't check consistency check = False except KeyError: check = False if not check: skipped_files += 1 continue uukey, file_size = uukey_and_size(full_path) if uukey != old_entry.uuid: print("WARNING: hash mismatch! {}".format(target_path)) errors.append(target_path) print() print("Finished consistency check:") print("{} files on disk ({} skipped), {} in tree".format(total_files, skipped_files, len(old_tree.entries))) print() if len(errors) > 0: print("{} errors:".format(len(errors))) for e in errors: print(e) else: print("No errors.")
def add_file(self, full_path, uukey, file_size): if self.cb is None: raise NotImplementedError # Make sure that file_size is accurate assert file_size == os.path.getsize(full_path) if self.curr_archive is None or self.curr_archive_size >= self.archive_size: self.finish_archive() self.curr_uuid = "P" + uuid.uuid4().hex[15:] self.curr_archive = tarfile.open(self._curr_archive_filepath(), mode="w") self.add_archive_info() log(PackStarted(self.curr_uuid)) self.curr_archive.add(full_path, arcname=uukey) self.curr_archive_size += file_size return self.curr_uuid
def thaw_file(conf, tree, entry, fullpath): assert(isinstance(entry, FileEntry)) # TODO: posix stuff utype = tree.uuid_type(entry.uuid) if utype == 'smallfile': # Small file pack_id = tree.file_pack[entry.uuid] data_cf = conf.st.load_archive(tree.uuid_to_storage[pack_id]) archive = tarfile.open(data_cf.fullpath(), 'r') with closing(archive): inf = archive.extractfile(entry.uuid) with closing(inf), open(fullpath, 'wb') as outf: shutil.copyfileobj(inf, outf) log(ProcessFileResult('OK')) else: storage_tag = tree.uuid_to_storage[entry.uuid] data_cf = conf.st.load_archive(storage_tag) with closing(data_cf): shutil.copy(data_cf.fullpath(), fullpath) log(ProcessFileResult('OK'))
def do_thaw(conf, tree, dest_path): """st: the storage object to pull archives from""" for (relpath, entry) in tree.entries.items(): fullpath = os.path.join(dest_path, relpath.lstrip('/')) log(StartedProcessingFile(relpath, fullpath)) dirname = os.path.dirname(fullpath) # Sanity check assert dest_path.rstrip('/') in dirname mkdir_p(dirname) # Check type of entry if entry.entry_type == TreeEntry.FILE: thaw_file(conf, tree, entry, fullpath) elif entry.entry_type == TreeEntry.DIR: # TODO: perms os.mkdir(fullpath) elif entry.entry_type == TreeEntry.SYMLINK: thaw_symlink(entry, fullpath) else: raise NotImplementedError("TreeEntry type not recognized: {}".format(entry.entry_type))
def do_freeze(conf, old_tree, target_name): if not conf.has_option('targets', target_name): print("ERROR: target {} doesn't exist".format(target_name)) return None dry_run = conf.getboolean('options', 'dry-run') new_tree = old_tree.copy() print("new_tree: {}".format(new_tree.entries)) uploader = FileUploader(conf, conf.st) uploader.start() def store_file_small(full_path, uukey, target_path): uuid = None if not dry_run: uuid = ar.add_file(full_path, uukey, file_size) new_tree.file_pack[uukey] = uuid def store_file_large(full_path, uukey, target_path): if not dry_run: # Uploads to Glacier in another thread uploader.store(full_path, uukey) new_tree.uuid_to_storage[uukey] = None def store_archive(ar_uuid, arpath): if not dry_run: uploader.store(arpath, ar_uuid) new_tree.uuid_to_storage[ar_uuid] = None def store_symlink(full_path, target_path): symlink_target = os.path.realpath(full_path) if symlink_target.startswith(root_path): symlink_target = os.path.relpath(symlink_target, os.path.dirname(full_path)) new_tree.entries[target_path] = tree.SymlinkEntry(symlink_target) root_path = conf.get('targets', target_name) ar = archiver.Archiver(conf, target_name) ar.set_callback(store_archive) for (full_path, target_path) in files_to_consider(conf, target_name): log(StartedProcessingFile(target_path, full_path)) try: sb = os.stat(full_path) except OSError as e: if e.errno == errno.ENOENT or e.errno == errno.EPERM: log(ProcessFileResult('Skip', 'Errno {}'.format(e.errno))) continue # Should we skip this file? skip_reason = should_skip(conf, sb, target_path, old_tree) if skip_reason: log(ProcessFileResult('Skip', skip_reason)) continue # Symlinks if os.path.islink(full_path): store_symlink(full_path, target_path) log(ProcessFileResult('Symlink')) continue # Hash and check if data already stored uukey, file_size = uukey_and_size(full_path) if conf.getboolean('options', 'tree-only'): new_tree.files[target_path] = tree.TreeEntry(uukey, None) log(ProcessFileResult('{}'.format(uukey[:32]))) continue # TODO: posix stuff new_tree.entries[target_path] = \ tree.FileEntry(sb.st_uid, sb.st_gid, stat.S_IMODE(sb.st_mode), uukey, datetime.utcnow()) if not new_tree.is_stored(uukey): if file_size <= conf.getint('options', 'filesize-limit'): store_file_small(full_path, uukey, target_path) else: store_file_large(full_path, uukey, target_path) log(ProcessFileResult('{}'.format(uukey[:32]))) else: log(ProcessFileResult('Skip', 'Already stored')) # Update archive IDs ar.finish_archive() uploader.to_store.put(UPLOAD_DONE) # Progress indicator while True: num_processed = uploader.progress.get() log(ProgressReport(num_processed, uploader.num_requested)) if num_processed == uploader.num_requested: break uploader.join() # Resolve archive IDs try: for i in range(uploader.num_requested): (uuid, storageid) = uploader.done.get(False) new_tree.uuid_to_storage[uuid] = storageid except queue.Empty: print("ERROR: not enough processed files") if not uploader.done.empty(): print("ERROR: some files unaccounted!") # Check to see that all UUIDs in tree have storage tag for uuid, stag in new_tree.uuid_to_storage.items(): if stag is None: print("ERROR: uuid {} doesn't have storage tag".format(uuid)) # Store the new tree return new_tree