def __init__(self, context, path, pd_path=None, name=None, loglevel=logging.INFO, **kwargs): self.context = context self.path = os.path.expanduser(path) if not name: name = context self.config = config.Config.instance() lazy_write = utils.get_interval(self.config, "LAZY WRITE", (context)) self.pd_filename = f".cb.{context}-lite.json.bz2" if pd_path: pd_file = f"{pd_path}/{self.pd_filename}" else: pd_file = f"{self.path}/{self.pd_filename}" super().__init__(pd_file, lazy_write=lazy_write) self.logger = logging.getLogger(logger_str(__class__) + " " + name) self.logger.setLevel(loglevel) self.ignored_suffixes = {} self.stat = stats.Statistic(buckets=(0, 5, 10, 30)) self.report_timer = elapsed.ElapsedTimer()
def run(self): self.bailing = False # future use, to kill the Thread timer = elapsed.ElapsedTimer() for source_context in self.scanners: self.scanners[source_context].scan() self.logger.debug( f"scan complete, {len(self.scanners[source_context].keys())} files" ) self.reinventory() while not self.bailing: self.config.load() # re-check this, in case config reloaded sleep_time = self.get_interval("rescan") // 2 hysteresis = self.get_interval("hysteresis") self.logger.info(f"running") while self.step(): if hysteresis: time.sleep(hysteresis) self.logger.debug(f"stepping again") if timer.once_every(sleep_time): for source_context in self.scanners: self.scanners[source_context].scan() self.heartbeep() self.reinventory() # self.inform() # self.inventory() self.audit() self.logger.info(f"sleeping {utils.duration_to_str(sleep_time)}") time.sleep(sleep_time)
def run(self): self.bailing = False # future use, to kill the Thread self.last_copy = "unknown" timer = elapsed.ElapsedTimer() while not self.bailing: self.audit() # TODO: honor different rescans per source if timer.once_every(self.rescan): self.run_all_scanners_once() self.reinventory() self.server_statuses = self.check_on_servers() self.logger.debug(f"JFYI: {self.server_statuses}") if self.full_p(): self.logger.debug("I'm full (?) trying to drop") self.try_to_drop() elif "underserved" in self.server_statuses: self.try_to_copy() elif "available" in self.server_statuses and \ self.last_copy != "not enough space": self.try_to_copy() else: sleep_time = self.rescan - timer.elapsed() sleep_msg = utils.duration_to_str(sleep_time) self.logger.info(f"sleeping {sleep_msg} til next rescan") time.sleep(sleep_time)
def __init__(self, context, path, **kwargs): self.context = context self.path = os.path.expanduser(path) if "name" in kwargs: name = kwargs["name"] else: name = context if "checksums" in kwargs: self.checksums = kwargs['checksums'] else: self.checksums = True self.config = config.Config.instance() self.pd_filename = f".cb.{context}.json.bz2" lazy_write = utils.str_to_duration( self.config.get(context, "LAZY WRITE", 5)) super().__init__(f"{self.path}/{self.pd_filename}", lazy_write=lazy_write, **kwargs) self.logger = logging.getLogger(logger_str(__class__) + " " + name) # self.logger.setLevel(logging.INFO) self.ignored_suffixes = {} self.report_timer = elapsed.ElapsedTimer() self.stat = stats.Statistic(buckets=(0, 5, 10, 30))
def auditor(self): timer = elapsed.ElapsedTimer() while True: time.sleep(15) self.logger.info("Servlet status update: ") for context, servlet in self.servlets.items(): servlet.audit()
def auditor(self): timer = elapsed.ElapsedTimer() while True: time.sleep(15) self.logger.info(f"aggregate qps: {self.stats['handler'].qps()}") self.logger.info("Servlet status update: ") for context, servlet in self.servlets.items(): self.logger.info(f"{context} qps: {self.stats[context].qps()}") servlet.audit()
def test_qps_lite(self): import elapsed timer = elapsed.ElapsedTimer() dir = "/users/austind/src/cb/test/source1" s = scanner.ScannerLite("test", dir, checksums=False) print("starting QPS test") s.scan() print(f"Total: {timer.elapsed():5.2f}s") os.remove(f"{dir}/.cb.test-lite.json.bz2")
def start(self): while True: self.logger.info("Starting") self.config.load() sources = self.config.get_sources_for_host(self.hostname) print(f"sources: {sources}") if len(sources.items()) > 0: for context, source in sources.items(): self.logger.info(f"{context}: {source}") gcm = GhettoClusterSource(context) gcm.scan() self.get_status(context, source, False) self.logger.info("sources are complete.") else: self.logger.info("source of None") replicas = self.config.get_replicas_for_host(self.hostname) if len(replicas.items()) > 0: for context, replica in replicas.items(): self.logger.info(f"{context}: {replica}") source = self.config.get_source_for_context(context) dest = config.path_for(replica) gcs = GhettoClusterReplica(context, replica, source) # gcs.pull() # gcs.scan() puller = Thread(target=gcs.pull) self.logger.info("Starting pull thread") puller.start() timer = elapsed.ElapsedTimer() while puller.is_alive(): if timer.once_every(15): scanner = Thread(target=gcs.scan) self.logger.debug("Starting scan thread") scanner.start() scanner.join() self.logger.debug("Scan thread finished") else: time.sleep(1) # spin, but not hard gcs.scan() self.logger.info("Replicas are complete") else: self.logger.info("replica to noone") try: signal.signal(signal.SIGHUP, self.wakeup) CYCLE = str_to_duration(self.config.getOption("CYCLE", "24h")) self.logger.info(f"Sleeping for {duration_to_str(CYCLE)}" + \ f" in PID {os.getpid()}") self.logger.debug("send SIGHUP to wake up") time.sleep(CYCLE) except WakeupException: self.logger.warn(f"Restarting as requested (SIGHUP)") signal.signal(signal.SIGHUP, signal.SIG_DFL)
def __init__(self, filename, lazy_timer=0, **kwargs): self.masterFilename = filename self.transactionName = None self.data = {} self.logger = logging.getLogger(logger_str(__class__)) self.lazy_timer = lazy_timer self.dirty = False self.read() self.clear_dirtybits() self.timer = elapsed.ElapsedTimer() if "metadata" in kwargs: self.metadata_key = kwargs["metadata"] else: self.metadata_key = "__metadata__"
def test_timer(self): start = int(time.time()) timer = elapsed.ElapsedTimer() self.assertEquals(timer.once_every(10), True) time.sleep(3) self.assertEquals(int(timer.elapsed()), 3) timer.reset() self.assertEquals(int(timer.elapsed()), 0) time.sleep(3) self.assertEquals(int(timer.elapsed()), 3) print(f"So far only {time.time() - start}s elapsed") self.assertEquals(timer.once_every(10), False) time.sleep(4) self.assertEquals(timer.once_every(10), True)
def run(self): self.bailout = False # pre-scan self.scanner.scan() self.logger.info("Ready to serve") self.handling = True while not self.bailout: timer = elapsed.ElapsedTimer() self.config.load() self.rescan = utils.get_interval(self.config, "rescan", self.context) self.scanner.scan() sleepy_time = max(self.rescan - timer.elapsed(), 10) sleep_msg = utils.duration_to_str(sleepy_time) self.logger.info(f"sleeping {sleep_msg} til next rescan") time.sleep(sleepy_time)
def __init__(self, filename, loglevel=logging.INFO, *args, **kwargs): self.logger = logging.getLogger(logger_str(__class__)) self.logger.setLevel(loglevel) self.masterFilename = filename self.args = args self.kwargs = kwargs if 'lazy_write' in kwargs: self.lazy_timer = kwargs['lazy_write'] else: self.lazy_timer = 0 self.data = {} if 'cls' in kwargs: self.cls = kwargs['cls'] else: self.cls = None self.lock = threading.RLock() self.read() self.clear_dirtybits() self.timer = elapsed.ElapsedTimer()
def __init__(self, context): super().__init__() self.context = context self.config = config.Config.instance() self.logger = logging.getLogger(logger_str(__class__) + " " + context) self.logger.info(f"Creating clientlet {self.context}") self.path = config.path_for(self.config.get(self.context, "backup")) assert os.path.exists(self.path), f"{self.path} does not exist!" # creates per-source scanners, random_source_list, claims self.build_sources() self.stats = stats.Stats() self.bailing = False self.datagrams = {} self.current_state = "startup" self.state_timer = elapsed.ElapsedTimer() self.states = {'startup': 0} self.efficiency = {}
def run(self): # startup tasks self.run_all_scanners_once() self.build_backups() self.unclaim_all() # just once, on startup self.claim_everything() while not self.bailing: timer = elapsed.ElapsedTimer() self.update_allocation() self.run_all_scanners_once() self.crawl() self.audit() # TODO: per-source rescan intervals? rescan = get_interval(self.config, "rescan", self.context) # randomly run 2-4x per rescan interval rescan = random.randrange(rescan // 4, rescan // 2) sleep_time = max(rescan - timer.elapsed(), 10) sleep_msg = duration_to_str(sleep_time) self.logger.info(f"sleeping {sleep_msg} til next rescan") time.sleep(sleep_time)
def run(self, deleting=False): self.logger.info(f"Running, {self.context}:{self.path}") if deleting: self.logger.info(f"Deleting :|") puller = Thread(target=self.pull, args=(deleting, )) self.logger.debug("Starting pull thread") puller.start() timer = elapsed.ElapsedTimer() ignorals = self.config.get_ignorals(self.context) scn = scanner.Scanner(self.path, ignorals, self.states_filename) while puller.is_alive(): if timer.once_every(300): scn.scan() self.push() self.get_status(brief=True) else: time.sleep(1) puller.join() scn.scan() self.push() self.logger.info(f"Finished: {self.context}:{self.path}")
def rsync_from_list(self, source_context, filename): self.logger.debug(f"rsync {source_context}: {filename}") verbose = self.config.get("global", "verbose", False) dryrun = self.config.get("global", "dryrun", False) timer = elapsed.ElapsedTimer() (n, size) = self.sizeof(source_context) source = self.sources[source_context] src_host = config.host_for(source) hostname = config.host_for(self.config.get(self.context, "backup")) if src_host == hostname: # a local copy, just use path source = config.path_for(source) dest = f"{self.paths[source_context]}/" filesfrom = f"--files-from={filename}" # self.logger.debug(f"rsync --delete {source} {dest} --files-from={filename}") prefix = f"{self.context}:{source_context}" rsync_exit = rsync(source, dest, (filesfrom, "-v", "--progress"), prefix=prefix) bps = size / timer.elapsed() self.logger.debug( f"rsync returned {rsync_exit}: {bytes_to_str(bps)}B/s effective") return rsync_exit