def _one_thread_one_directory_nolink(self, wts, dir_ri): ''' Do the work for a single directory; hardlink support disabled This is a fastpath version of _one_thread_one_directory_withlink ''' logger = self.logger reader = self._reader found_dirs = list() try: directory = reader.opendir(dir_ri.path) except (KeyboardInterrupt, SystemExit): raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError("%s: unable to open directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e while True: try: ri = reader.getnextfromdir(directory) except (KeyboardInterrupt, SystemExit): raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError( "%s: unable to read entry in directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e if not ri: with self._lock: self._dirs_pending.extend(found_dirs) return if ri.ftype == Ftype.DIR: found_dirs.append(ri) # Peek at self._busy_count and self._dirs_pending without the lock as an optimization if (self._busy_count < self._thread_count) or ( not self._dirs_pending) or (len(found_dirs) >= 100): with self._lock: self._dirs_pending.extend(found_dirs) found_dirs = list() wts.result.dr_count += 1 wts.result.dr_gb += ri.ostat.st_size / Size.GB if (ri.ftype == Ftype.REG) and (not reader.readable(ri)): with self._lock: self.logger.error("cannot read '%s'", ri.path) self._error_count += 1
def obj_reconcile(wrock, tobj, content, desc, blob_name): ''' Do the work to reconcile a target. tobj is the current database content with the backpointer map fully populated. content is the data payload of the object. desc is a description of the backing - eg 'blob' for Azure Return the new data payload for the object. ''' try: ba = strip_compression_header_and_decompress(content, tobj, blob_name) parse_state = ParseState(tobj, ba) oblobs = parse_state.parse_shallow() except (NamedObjectError, TerminalError): raise except Exception: exc_log(wrock.logger, logging.ERROR, "inode %s parse failure", desc) raise TargetObjectError( tobj, "inode %s parse failure: %s" % (desc, exc_info_err())) vattr_bytes = bytearray() unparse_attr(wrock.run_options, vattr_bytes, tobj) backpointers = tobj.backpointer_list_generate( include_null_firstbackpointer=True) backpointer_bytes = bytearray() unparse_back(backpointer_bytes, backpointers) ba = bytearray() owner_id = None for oblob in oblobs: obtype = oblob.obtype if obtype == CLFSObjHandleType.OBTYPE_VATTR: data = vattr_bytes elif obtype == CLFSObjHandleType.OBTYPE_BACK: data = backpointer_bytes elif obtype == CLFSObjHandleType.OBTYPE_DIRENTS: data, owner_id = _obj_reconcile__repack_dirents( wrock, tobj, oblob, blob_name) else: data = oblob.data realCount = len(data) blobCountRaw = realCount + get_byte_count_header() blobCount = (blobCountRaw + ClfsObjParseBase.OBTYPE_ROUNDUP_SIZE - 1) & ClfsObjParseBase.OBTYPE_ROUNDUP_MASK padding = blobCount - blobCountRaw unparse_header(ba, obtype.value, realCount, blobCount) ba.extend(data) ba.extend(bytes(padding)) if tobj.ftype == Ftype.DIR: if owner_id is None: raise TargetObjectError(tobj, "no dirents (internal error)", blob_name=blob_name) else: owner_id = tobj.first_backpointer if owner_id == FILEHANDLE_NULL: wrock.logger.warning( "%s reconcile %s appears to be orphaned with nlink_effective=%s", wrock, tobj.describe(), tobj.nlink_effective()) return ba, owner_id
def opendir(self, dirpath): 'See Reader' dirpath = dirpath if dirpath is not None else self._src try: sd = os.scandir(dirpath) except FileNotFoundError as e: raise SourceObjectError( dirpath, "cannot open directory: %s" % exc_info_name()) from e except Exception as e: raise SourceObjectError( dirpath, "cannot open directory: %s" % exc_info_err()) from e return self._ScandirWrapper(sd, dirpath)
def open_input_file(self, tobj): 'Open one input file (ftype REG) for reading' if isinstance(tobj, str): source_path = tobj else: source_path = tobj.source_path_str try: return self.input_file_reader(source_path) except SourceObjectError: raise except Exception as e: raise SourceObjectError(source_path, "cannot open file: %s" % exc_info_err()) from e
def _fsync(self): 'Flush and fsync' if self._flush_failed: raise WriteFileError("%s flush failed previously" % self) if self._file is None: return try: self._flush_real() except BaseException as e: self._flush_failed = True err = "cannot flush %s: %s" % (self, exc_info_err()) self._logger.error("%s\n%s\n%s", err, exc_stack(), err) if isinstance(e, Exception): raise WriteFileError(err) from e raise
def readlink(tobj): 'Fetch the target of a symbolic link' if isinstance(tobj, str): source_path = tobj else: source_path = tobj.source_path_str try: return os.readlink(source_path) except Exception as e: if isinstance(e, OSError) and os.path.exists(source_path) and ( not os.path.islink(source_path)): raise SourceObjectError( source_path, "source path is not a symbolic link") from e raise SourceObjectError( source_path, "cannot read symbolic link: %s" % exc_info_err()) from e
def getnextfromdir(self, directory): ''' See Reader directory is returned from opendir() - a _ScandirWrapper ''' try: ent = next(directory.sd) except StopIteration: return None except Exception as e: msg = "cannot get next directory entry: %s" % exc_info_err() raise SourceObjectError(directory.path, msg) from e try: st = self._getnextfromdir__dostat(ent) except Exception as e: msg = "cannot stat: %s %s" % (e.__class__.__name__, e) raise SourceObjectError(ent.path, msg) from e return self._readerinfo_from_stat(ent.path, ent.name, st)
def _one_thread_one_directory_withlink(self, wts, dir_ri): ''' Do the work for a single directory; hardlink support enabled. ''' logger = self.logger reader = self._reader found_dirs = list() try: directory = reader.opendir(dir_ri.path) except (KeyboardInterrupt, SystemExit) as e: if isinstance(e, KeyboardInterrupt): logger.warning("KeyboardInterrupt %s", self._loc()) raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError( "%s: unable to read entry in directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e while True: try: ri = reader.getnextfromdir(directory) except (KeyboardInterrupt, SystemExit): raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError( "%s: unable to read entry in directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e if not ri: with self._lock: self._dirs_pending.extend(found_dirs) return if ri.ftype == Ftype.DIR: found_dirs.append(ri) # Peek at self._busy_count and self._dirs_pending without the lock as an optimization if (self._busy_count < self._thread_count) or ( not self._dirs_pending) or (len(found_dirs) >= 100): with self._lock: self._dirs_pending.extend(found_dirs) found_dirs = list() elif ri.ostat.st_nlink != 1: # Non-directory with nlink != 1 with self._nlink_lock: if ri.ostat.st_ino in self._nlink_set: # Already counted this one continue if len(self._nlink_set) >= self.MAX_NLINK: if not self._nlink_limit_warned: self.logger.warning( "reached nlink tracking limit %d; dry run counts may be inaccurate", self.MAX_NLINK) self._nlink_limit_warned = True else: self._nlink_set.add(ri.ostat.st_ino) if ri.ostat.st_nlink > CLFS_LINK_MAX: count = 1 + ri.ostat.st_nlink - CLFS_LINK_MAX wts.result.dr_count += count wts.result.dr_gb += count * (ri.ostat.st_size / Size.GB) continue wts.result.dr_count += 1 wts.result.dr_gb += ri.ostat.st_size / Size.GB