def run(self): ''' Do the dry run. Return the DryRunResult. ''' logger = self.logger with self._lock: try: for wts in self._thread_states: wts.start() while self.should_run: if (not self._dirs_pending) and (not self._busy_count): # Nothing pending and all threads are idle self._stop_NL() self._dir_cond.notify_all() break self._run_cond.wait(timeout=1.0) while self._done_count < self._thread_count: self._run_cond.wait(timeout=1.0) if self._error_count > 0: logger.error("Dry run failed with error count %u", self._error_count) raise SystemExit(1) result = DryRunResult(count=1, gb=self._root_ri_gb) for wts in self._thread_states: result.add(wts.result) self.result = result return result except (KeyboardInterrupt, SystemExit) as e: logger.warning("%s %s", e.__class__.__name__, self._loc()) self._abort_NL() except: exc_log(logger, logging.ERROR, str(wts)) self._abort_NL()
def _one_thread_run(self, wts): ''' Do the work associated with one thread. Wraps _one_thread_run_inner() and ensures that high-level thread mechanics are handled. ''' logger = self.logger try: self._one_thread_run_inner(wts) except KeyboardInterrupt as e: logger.warning("KeyboardInterrupt %s", self._loc()) self._abort() raise SystemExit(1) from e except SystemExit: self._abort() raise except BaseException as e: exc_log(logger, logging.ERROR, str(wts)) self._abort() raise SystemExit(1) from e finally: with self._lock: self._done_count += 1 self._dir_cond.notify_all() self._run_cond.notify_all()
def readAheadThread(self, logger): ''' Thread for performing the read-ahead (producer). This requires some work if we want more than one read-ahead thread to ensure that we append to the queue in the proper order. ''' try: with self._lock: if not self._run: return self._active = True try: while self._run: segment_bytes = self._file_reader.read(self._read_length) if not segment_bytes: break # done self._segments.put(segment_bytes, block=True) self._read_length = CLFSSegment.OTHER_SEGMENT_BYTES except BaseException as e: exc_log(logger, logging.ERROR, self.__class__.__name__) self.error = e if self._run: try: self._segments.put(bytes()) except BaseException as e: exc_log(logger, logging.ERROR, self.__class__.__name__) if self.error is None: self.error = e finally: with self._lock: self._active = False self._cond.notify_all()
def obj_reconcile(wrock, tobj, content, desc, blob_name): ''' Do the work to reconcile a target. tobj is the current database content with the backpointer map fully populated. content is the data payload of the object. desc is a description of the backing - eg 'blob' for Azure Return the new data payload for the object. ''' try: ba = strip_compression_header_and_decompress(content, tobj, blob_name) parse_state = ParseState(tobj, ba) oblobs = parse_state.parse_shallow() except (NamedObjectError, TerminalError): raise except Exception: exc_log(wrock.logger, logging.ERROR, "inode %s parse failure", desc) raise TargetObjectError( tobj, "inode %s parse failure: %s" % (desc, exc_info_err())) vattr_bytes = bytearray() unparse_attr(wrock.run_options, vattr_bytes, tobj) backpointers = tobj.backpointer_list_generate( include_null_firstbackpointer=True) backpointer_bytes = bytearray() unparse_back(backpointer_bytes, backpointers) ba = bytearray() owner_id = None for oblob in oblobs: obtype = oblob.obtype if obtype == CLFSObjHandleType.OBTYPE_VATTR: data = vattr_bytes elif obtype == CLFSObjHandleType.OBTYPE_BACK: data = backpointer_bytes elif obtype == CLFSObjHandleType.OBTYPE_DIRENTS: data, owner_id = _obj_reconcile__repack_dirents( wrock, tobj, oblob, blob_name) else: data = oblob.data realCount = len(data) blobCountRaw = realCount + get_byte_count_header() blobCount = (blobCountRaw + ClfsObjParseBase.OBTYPE_ROUNDUP_SIZE - 1) & ClfsObjParseBase.OBTYPE_ROUNDUP_MASK padding = blobCount - blobCountRaw unparse_header(ba, obtype.value, realCount, blobCount) ba.extend(data) ba.extend(bytes(padding)) if tobj.ftype == Ftype.DIR: if owner_id is None: raise TargetObjectError(tobj, "no dirents (internal error)", blob_name=blob_name) else: owner_id = tobj.first_backpointer if owner_id == FILEHANDLE_NULL: wrock.logger.warning( "%s reconcile %s appears to be orphaned with nlink_effective=%s", wrock, tobj.describe(), tobj.nlink_effective()) return ba, owner_id
def _put_best_effort(self, item): ''' Make a best-effort to send item, but do not block trying to get it done. ''' try: self.pq.put(item, block=False, timeout=0.0) except Exception: exc_log(self.logger, logging.WARNING, getframe(1))
def flush_write(self): ''' Flush pending writes. ''' try: with self._lock: self._write_queue_NL() except: exc_log(self.logger, logging.WARNING, "%s: flush_write" % self._wps)
def readerinfo_from_source_path(self, source_path, source_name): 'See Reader' try: st = os.lstat(source_path) except FileNotFoundError as e: raise SourceObjectError(source_path, "cannot stat: %s" % exc_info_name()) from e except Exception as e: msg = "cannot stat: %s %s" % (e.__class__.__name__, e) exc_log(self.logger, logging.DEBUG, msg) raise SourceObjectError(source_path, msg) from e return self._readerinfo_from_stat(source_path, source_name, st)
def _one_thread_one_directory_nolink(self, wts, dir_ri): ''' Do the work for a single directory; hardlink support disabled This is a fastpath version of _one_thread_one_directory_withlink ''' logger = self.logger reader = self._reader found_dirs = list() try: directory = reader.opendir(dir_ri.path) except (KeyboardInterrupt, SystemExit): raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError("%s: unable to open directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e while True: try: ri = reader.getnextfromdir(directory) except (KeyboardInterrupt, SystemExit): raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError( "%s: unable to read entry in directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e if not ri: with self._lock: self._dirs_pending.extend(found_dirs) return if ri.ftype == Ftype.DIR: found_dirs.append(ri) # Peek at self._busy_count and self._dirs_pending without the lock as an optimization if (self._busy_count < self._thread_count) or ( not self._dirs_pending) or (len(found_dirs) >= 100): with self._lock: self._dirs_pending.extend(found_dirs) found_dirs = list() wts.result.dr_count += 1 wts.result.dr_gb += ri.ostat.st_size / Size.GB if (ri.ftype == Ftype.REG) and (not reader.readable(ri)): with self._lock: self.logger.error("cannot read '%s'", ri.path) self._error_count += 1
def _one_thread_one_directory_withlink(self, wts, dir_ri): ''' Do the work for a single directory; hardlink support enabled. ''' logger = self.logger reader = self._reader found_dirs = list() try: directory = reader.opendir(dir_ri.path) except (KeyboardInterrupt, SystemExit) as e: if isinstance(e, KeyboardInterrupt): logger.warning("KeyboardInterrupt %s", self._loc()) raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError( "%s: unable to read entry in directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e while True: try: ri = reader.getnextfromdir(directory) except (KeyboardInterrupt, SystemExit): raise except BaseException as e: exc_log(logger, logging.DEBUG, str(wts)) raise SimpleError( "%s: unable to read entry in directory '%s': %s" % (wts, dir_ri.path, exc_info_err())) from e if not ri: with self._lock: self._dirs_pending.extend(found_dirs) return if ri.ftype == Ftype.DIR: found_dirs.append(ri) # Peek at self._busy_count and self._dirs_pending without the lock as an optimization if (self._busy_count < self._thread_count) or ( not self._dirs_pending) or (len(found_dirs) >= 100): with self._lock: self._dirs_pending.extend(found_dirs) found_dirs = list() elif ri.ostat.st_nlink != 1: # Non-directory with nlink != 1 with self._nlink_lock: if ri.ostat.st_ino in self._nlink_set: # Already counted this one continue if len(self._nlink_set) >= self.MAX_NLINK: if not self._nlink_limit_warned: self.logger.warning( "reached nlink tracking limit %d; dry run counts may be inaccurate", self.MAX_NLINK) self._nlink_limit_warned = True else: self._nlink_set.add(ri.ostat.st_ino) if ri.ostat.st_nlink > CLFS_LINK_MAX: count = 1 + ri.ostat.st_nlink - CLFS_LINK_MAX wts.result.dr_count += count wts.result.dr_gb += count * (ri.ostat.st_size / Size.GB) continue wts.result.dr_count += 1 wts.result.dr_gb += ri.ostat.st_size / Size.GB