def __init__(self,
                 context,
                 path,
                 pd_path=None,
                 name=None,
                 loglevel=logging.INFO,
                 **kwargs):
        self.context = context
        self.path = os.path.expanduser(path)
        if not name:
            name = context

        self.config = config.Config.instance()
        lazy_write = utils.get_interval(self.config, "LAZY WRITE", (context))
        self.pd_filename = f".cb.{context}-lite.json.bz2"
        if pd_path:
            pd_file = f"{pd_path}/{self.pd_filename}"
        else:
            pd_file = f"{self.path}/{self.pd_filename}"
        super().__init__(pd_file, lazy_write=lazy_write)
        self.logger = logging.getLogger(logger_str(__class__) + " " + name)
        self.logger.setLevel(loglevel)
        self.ignored_suffixes = {}
        self.stat = stats.Statistic(buckets=(0, 5, 10, 30))
        self.report_timer = elapsed.ElapsedTimer()
 def run(self):
     self.bailing = False  # future use, to kill the Thread
     timer = elapsed.ElapsedTimer()
     for source_context in self.scanners:
         self.scanners[source_context].scan()
         self.logger.debug(
             f"scan complete, {len(self.scanners[source_context].keys())} files"
         )
     self.reinventory()
     while not self.bailing:
         self.config.load()
         # re-check this, in case config reloaded
         sleep_time = self.get_interval("rescan") // 2
         hysteresis = self.get_interval("hysteresis")
         self.logger.info(f"running")
         while self.step():
             if hysteresis: time.sleep(hysteresis)
             self.logger.debug(f"stepping again")
             if timer.once_every(sleep_time):
                 for source_context in self.scanners:
                     self.scanners[source_context].scan()
                 self.heartbeep()
                 self.reinventory()
                 # self.inform()
                 # self.inventory()
         self.audit()
         self.logger.info(f"sleeping {utils.duration_to_str(sleep_time)}")
         time.sleep(sleep_time)
    def run(self):
        self.bailing = False  # future use, to kill the Thread
        self.last_copy = "unknown"
        timer = elapsed.ElapsedTimer()
        while not self.bailing:
            self.audit()
            # TODO: honor different rescans per source
            if timer.once_every(self.rescan):
                self.run_all_scanners_once()
                self.reinventory()

            self.server_statuses = self.check_on_servers()
            self.logger.debug(f"JFYI: {self.server_statuses}")
            if self.full_p():
                self.logger.debug("I'm full (?) trying to drop")
                self.try_to_drop()
            elif "underserved" in self.server_statuses:
                self.try_to_copy()
            elif "available" in self.server_statuses and \
                self.last_copy != "not enough space":
                self.try_to_copy()
            else:
                sleep_time = self.rescan - timer.elapsed()
                sleep_msg = utils.duration_to_str(sleep_time)
                self.logger.info(f"sleeping {sleep_msg} til next rescan")
                time.sleep(sleep_time)
    def __init__(self, context, path, **kwargs):
        self.context = context
        self.path = os.path.expanduser(path)
        if "name" in kwargs:
            name = kwargs["name"]
        else:
            name = context

        if "checksums" in kwargs:
            self.checksums = kwargs['checksums']
        else:
            self.checksums = True

        self.config = config.Config.instance()
        self.pd_filename = f".cb.{context}.json.bz2"
        lazy_write = utils.str_to_duration(
            self.config.get(context, "LAZY WRITE", 5))
        super().__init__(f"{self.path}/{self.pd_filename}",
                         lazy_write=lazy_write,
                         **kwargs)
        self.logger = logging.getLogger(logger_str(__class__) + " " + name)
        # self.logger.setLevel(logging.INFO)
        self.ignored_suffixes = {}
        self.report_timer = elapsed.ElapsedTimer()
        self.stat = stats.Statistic(buckets=(0, 5, 10, 30))
 def auditor(self):
     timer = elapsed.ElapsedTimer()
     while True:
         time.sleep(15)
         self.logger.info("Servlet status update: ")
         for context, servlet in self.servlets.items():
             servlet.audit()
Exemple #6
0
 def auditor(self):
     timer = elapsed.ElapsedTimer()
     while True:
         time.sleep(15)
         self.logger.info(f"aggregate qps: {self.stats['handler'].qps()}")
         self.logger.info("Servlet status update: ")
         for context, servlet in self.servlets.items():
             self.logger.info(f"{context} qps: {self.stats[context].qps()}")
             servlet.audit()
Exemple #7
0
 def test_qps_lite(self):
     import elapsed
     timer = elapsed.ElapsedTimer()
     dir = "/users/austind/src/cb/test/source1"
     s = scanner.ScannerLite("test", dir, checksums=False)
     print("starting QPS test")
     s.scan()
     print(f"Total: {timer.elapsed():5.2f}s")
     os.remove(f"{dir}/.cb.test-lite.json.bz2")
 def start(self):
     while True:
         self.logger.info("Starting")
         self.config.load()
         sources = self.config.get_sources_for_host(self.hostname)
         print(f"sources: {sources}")
         if len(sources.items()) > 0:
             for context, source in sources.items():
                 self.logger.info(f"{context}: {source}")
                 gcm = GhettoClusterSource(context)
                 gcm.scan()
                 self.get_status(context, source, False)
             self.logger.info("sources are complete.")
         else:
             self.logger.info("source of None")
         replicas = self.config.get_replicas_for_host(self.hostname)
         if len(replicas.items()) > 0:
             for context, replica in replicas.items():
                 self.logger.info(f"{context}: {replica}")
                 source = self.config.get_source_for_context(context)
                 dest = config.path_for(replica)
                 gcs = GhettoClusterReplica(context, replica, source)
                 # gcs.pull()
                 # gcs.scan()
                 puller = Thread(target=gcs.pull)
                 self.logger.info("Starting pull thread")
                 puller.start()
                 timer = elapsed.ElapsedTimer()
                 while puller.is_alive():
                     if timer.once_every(15):
                         scanner = Thread(target=gcs.scan)
                         self.logger.debug("Starting scan thread")
                         scanner.start()
                         scanner.join()
                         self.logger.debug("Scan thread finished")
                     else:
                         time.sleep(1)  # spin, but not hard
                 gcs.scan()
             self.logger.info("Replicas are complete")
         else:
             self.logger.info("replica to noone")
         try:
             signal.signal(signal.SIGHUP, self.wakeup)
             CYCLE = str_to_duration(self.config.getOption("CYCLE", "24h"))
             self.logger.info(f"Sleeping for {duration_to_str(CYCLE)}" + \
                                 f" in PID {os.getpid()}")
             self.logger.debug("send SIGHUP to wake up")
             time.sleep(CYCLE)
         except WakeupException:
             self.logger.warn(f"Restarting as requested (SIGHUP)")
             signal.signal(signal.SIGHUP, signal.SIG_DFL)
Exemple #9
0
 def __init__(self, filename, lazy_timer=0, **kwargs):
     self.masterFilename = filename
     self.transactionName = None
     self.data = {}
     self.logger = logging.getLogger(logger_str(__class__))
     self.lazy_timer = lazy_timer
     self.dirty = False
     self.read()
     self.clear_dirtybits()
     self.timer = elapsed.ElapsedTimer()
     if "metadata" in kwargs:
         self.metadata_key = kwargs["metadata"]
     else:
         self.metadata_key = "__metadata__"
 def test_timer(self):
     start = int(time.time())
     timer = elapsed.ElapsedTimer()
     self.assertEquals(timer.once_every(10), True)
     time.sleep(3)
     self.assertEquals(int(timer.elapsed()), 3)
     timer.reset()
     self.assertEquals(int(timer.elapsed()), 0)
     time.sleep(3)
     self.assertEquals(int(timer.elapsed()), 3)
     print(f"So far only {time.time() - start}s elapsed")
     self.assertEquals(timer.once_every(10), False)
     time.sleep(4)
     self.assertEquals(timer.once_every(10), True)
Exemple #11
0
 def run(self):
     self.bailout = False
     # pre-scan
     self.scanner.scan()
     self.logger.info("Ready to serve")
     self.handling = True
     while not self.bailout:
         timer = elapsed.ElapsedTimer()
         self.config.load()
         self.rescan = utils.get_interval(self.config, "rescan",
                                          self.context)
         self.scanner.scan()
         sleepy_time = max(self.rescan - timer.elapsed(), 10)
         sleep_msg = utils.duration_to_str(sleepy_time)
         self.logger.info(f"sleeping {sleep_msg} til next rescan")
         time.sleep(sleepy_time)
 def __init__(self, filename, loglevel=logging.INFO, *args, **kwargs):
     self.logger = logging.getLogger(logger_str(__class__))
     self.logger.setLevel(loglevel)
     self.masterFilename = filename
     self.args = args
     self.kwargs = kwargs
     if 'lazy_write' in kwargs:
         self.lazy_timer = kwargs['lazy_write']
     else:
         self.lazy_timer = 0
     self.data = {}
     if 'cls' in kwargs:
         self.cls = kwargs['cls']
     else:
         self.cls = None
     self.lock = threading.RLock()
     self.read()
     self.clear_dirtybits()
     self.timer = elapsed.ElapsedTimer()
Exemple #13
0
    def __init__(self, context):
        super().__init__()
        self.context = context
        self.config = config.Config.instance()
        self.logger = logging.getLogger(logger_str(__class__) + " " + context)
        self.logger.info(f"Creating clientlet {self.context}")

        self.path = config.path_for(self.config.get(self.context, "backup"))
        assert os.path.exists(self.path), f"{self.path} does not exist!"

        # creates per-source scanners, random_source_list, claims
        self.build_sources()

        self.stats = stats.Stats()
        self.bailing = False
        self.datagrams = {}
        self.current_state = "startup"
        self.state_timer = elapsed.ElapsedTimer()
        self.states = {'startup': 0}
        self.efficiency = {}
Exemple #14
0
    def run(self):
        # startup tasks
        self.run_all_scanners_once()
        self.build_backups()
        self.unclaim_all()  # just once, on startup
        self.claim_everything()
        while not self.bailing:
            timer = elapsed.ElapsedTimer()
            self.update_allocation()
            self.run_all_scanners_once()
            self.crawl()
            self.audit()

            # TODO: per-source rescan intervals?
            rescan = get_interval(self.config, "rescan", self.context)
            # randomly run 2-4x per rescan interval
            rescan = random.randrange(rescan // 4, rescan // 2)
            sleep_time = max(rescan - timer.elapsed(), 10)
            sleep_msg = duration_to_str(sleep_time)
            self.logger.info(f"sleeping {sleep_msg} til next rescan")
            time.sleep(sleep_time)
Exemple #15
0
 def run(self, deleting=False):
     self.logger.info(f"Running, {self.context}:{self.path}")
     if deleting:
         self.logger.info(f"Deleting :|")
     puller = Thread(target=self.pull, args=(deleting, ))
     self.logger.debug("Starting pull thread")
     puller.start()
     timer = elapsed.ElapsedTimer()
     ignorals = self.config.get_ignorals(self.context)
     scn = scanner.Scanner(self.path, ignorals, self.states_filename)
     while puller.is_alive():
         if timer.once_every(300):
             scn.scan()
             self.push()
             self.get_status(brief=True)
         else:
             time.sleep(1)
     puller.join()
     scn.scan()
     self.push()
     self.logger.info(f"Finished: {self.context}:{self.path}")
Exemple #16
0
 def rsync_from_list(self, source_context, filename):
     self.logger.debug(f"rsync {source_context}: {filename}")
     verbose = self.config.get("global", "verbose", False)
     dryrun = self.config.get("global", "dryrun", False)
     timer = elapsed.ElapsedTimer()
     (n, size) = self.sizeof(source_context)
     source = self.sources[source_context]
     src_host = config.host_for(source)
     hostname = config.host_for(self.config.get(self.context, "backup"))
     if src_host == hostname:  # a local copy, just use path
         source = config.path_for(source)
     dest = f"{self.paths[source_context]}/"
     filesfrom = f"--files-from={filename}"
     # self.logger.debug(f"rsync --delete {source} {dest} --files-from={filename}")
     prefix = f"{self.context}:{source_context}"
     rsync_exit = rsync(source,
                        dest, (filesfrom, "-v", "--progress"),
                        prefix=prefix)
     bps = size / timer.elapsed()
     self.logger.debug(
         f"rsync returned {rsync_exit}: {bytes_to_str(bps)}B/s effective")
     return rsync_exit