def pull(self): self.logger.debug(f"Pulling {self.dest} from {self.source}") # self.sync_filelist() # cwd = os.getcwd() # os.chdir(self.path) # self.pullfiles(".") args = [] # TODO: parse out multiple options options = self.config.getConfig(self.context, "rsync options") # if options is not None and "delete" in options: # self.logger.debug("DELETING") # args = [ "--delete" ] # # , "--delete-excluded" ] # this will screw with replicas args = [] for option in options: args += [f"--{option}"] # elif self.verbose: # self.logger.info("Probably not deleting") ignorals = self.build_ignorals() if len(ignorals) > 0: args += [f"--exclude={item}" for item in ignorals] if self.testing: # test mode: strip out hostnames for the rsync source = config.path_for(self.source) else: source = self.source dest = config.path_for(self.dest) self.logger.info("Starting rsync ...") file_state.rsync(source, dest, args)
def __init__(self, context): super().__init__() self.context = context self.logger = logging.getLogger(utils.logger_str(__class__) \ + " " + context) # self.logger.setLevel(logging.INFO) self.config = config.Config.instance() self.copies = int(self.config.get(self.context, "copies", 2)) self.path = config.path_for(self.config.get(self.context, "source")) self.scanner = scanner.Scanner(self.context, self.path) lazy_write = utils.str_to_duration( self.config.get(context, "LAZY WRITE", 5)) # TODO: support expiration self.rescan = utils.str_to_duration( self.config.get(self.context, "rescan")) self.clients = persistent_dict.PersistentDict( f"/tmp/cb.s{context}.json.bz2", lazy_write=lazy_write, cls=lock.Lock, expiry=self.rescan) self.drains = elapsed.ExpiringDict(300) # NOT persistent! self.locks = locker.Locker(5) # TODO: timers should relate to a configurable cycle time self.bailout = False self.stats = {'claims': 0, 'drops': 0} self.handling = False
def retrieve(self, source_context, filename, counter=0): self.logger.debug( f"retrieving {source_context}:{filename} to {self.path}/{source_context}/{filename}" ) # 0: do I have it? if self.scanners[source_context].contains_p(filename): self.logger.debug(f"I already have {filename}") # just send the one; inform() will handle the rest self.claim(source_context, filename, dropping=True) return # 1: build the filenames (full path) for source + dest source = self.config.get(source_context, "source") + "/" + filename src_host = config.host_for(source) hostname = config.host_for(self.config.get(self.context, "backup")) if src_host == hostname: # a local copy, just use path source = config.path_for(source) dest_path = f"{self.path}/{source_context}" dest = f"{dest_path}/{filename}" # 2: make the transfer self.logger.debug(f"rsync {source} {dest}") self.makedirs(dest) rsync_stat = file_state.rsync(source, dest) self.logger.debug(f"rsync returned {rsync_stat}") if rsync_stat == 0: # 3: record it self.claim(source_context, filename, dropping=True) # no retry else: self.logger.error("Failed to rsync???") raise FileNotFoundError
def __init__(self, context): super().__init__() self.context = context self.config = config.Config.instance() self.logger = logging.getLogger(logger_str(__class__) + " " + context) self.logger.info(f"Creating clientlet {self.context}") self.path = config.path_for(self.config.get(self.context, "backup")) assert os.path.exists(self.path), f"{self.path} does not exist!" # ALL source contexts (we care a lot) self.sources = {} self.scanners = {} self.random_source_list = [] self.build_sources() lazy_write = utils.str_to_duration( self.config.get(context, "LAZY WRITE", 5)) # TODO: my cache of claims should expire in rescan/2 self.rescan = self.get_interval("rescan") // 2 self.claims = PersistentDict(f"/tmp/cb.c{context}.json.bz2", lazy_write=5, expiry=self.rescan) self.drops = 0 # count the number of times I drop a file self.stats = stats.Stats() self.update_allocation() self.bailing = False self.datagrams = {}
def retrieve(self, source_context, filename): self.logger.debug(f"retrieving {source_context}:{filename}" + \ f" to {self.path}/{source_context}/{filename}") # 0: do I have it? if self.scanners[source_context].contains_p(filename): claimed = self.claim(source_context, filename, dropping=True) self.logger.debug( f"I already have {filename}; claimed = {claimed}") if claimed in ("ack", "keep"): return claimed else: self.logger.debug(f"Something's wrong, trying again") # 1: build the filenames (full path) for source + dest source = self.config.get(source_context, "source") + "/" + filename src_host = config.host_for(source) hostname = config.host_for(self.config.get(self.context, "backup")) if src_host == hostname: # a local copy, just use path source = config.path_for(source) dest = f"{self.path}/{source_context}/{filename}" # 2: make the transfer self.logger.debug(f"rsync {source} {dest}") self.makedirs(dest) rsync_stat = file_state.rsync(source, dest) self.logger.debug(f"rsync returned {rsync_stat}") if rsync_stat == 0: # 3: record it self.claim(source_context, filename, dropping=True) else: self.logger.error("Failed to rsync???") raise FileNotFoundError
def __init__(self, context): super().__init__(context) self.source = self.config.get_source_for_context(context) self.path = config.path_for(self.source) persistent_dict_file = f"{self.path}/.ghetto_cluster/" \ f"source.{context}.json" self.states = persistent_dict.PersistentDict(persistent_dict_file, \ self.config.getOption("LAZY_WRITE", 5)) self.logger = logging.getLogger(logger_str(__class__))
def test_config(self): cfg = config.Config.instance() cfg.init("test-config.txt", "source", "backup", hostname="localhost") contexts = list(cfg.get_contexts_for_key("source").keys()) print(contexts) context = contexts[1] path = config.path_for(cfg.get(context, "source")) print(path) s = scanner.Scanner(context, path) s.scan() filename = list(s.keys())[0] self.assertTrue(os.path.exists(f"{path}/{filename}"))
def __init__(self, context, source, testing=False): self.logger = logging.getLogger("gc.GhettoClusterSource") self.config = config.Config.instance() self.context = context self.source = source self.testing = testing self.verbose = self.config.getOption("verbose", "False") == "True" self.path = config.path_for(source) hostname = config.host_for(source) self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json" self.states = persistent_dict.PersistentDict(self.states_filename, \ self.config.getOption("LAZY_WRITE", 5))
def __init__(self, context, replica, testing=False): self.logger = logging.getLogger("gc.GhettoClusterReplica") self.config = config.Config.instance() self.context = context self.replica = replica self.testing = testing self.verbose = self.config.getOption("verbose", "False") == "True" self.source = self.config.get_source_for_context(self.context) if not self.source.endswith("/"): self.source = self.source + "/" hostname = config.host_for(replica) self.path = config.path_for(replica) self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json"
def start(self): while True: self.logger.info("Starting") self.config.load() sources = self.config.get_sources_for_host(self.hostname) print(f"sources: {sources}") if len(sources.items()) > 0: for context, source in sources.items(): self.logger.info(f"{context}: {source}") gcm = GhettoClusterSource(context) gcm.scan() self.get_status(context, source, False) self.logger.info("sources are complete.") else: self.logger.info("source of None") replicas = self.config.get_replicas_for_host(self.hostname) if len(replicas.items()) > 0: for context, replica in replicas.items(): self.logger.info(f"{context}: {replica}") source = self.config.get_source_for_context(context) dest = config.path_for(replica) gcs = GhettoClusterReplica(context, replica, source) # gcs.pull() # gcs.scan() puller = Thread(target=gcs.pull) self.logger.info("Starting pull thread") puller.start() timer = elapsed.ElapsedTimer() while puller.is_alive(): if timer.once_every(15): scanner = Thread(target=gcs.scan) self.logger.debug("Starting scan thread") scanner.start() scanner.join() self.logger.debug("Scan thread finished") else: time.sleep(1) # spin, but not hard gcs.scan() self.logger.info("Replicas are complete") else: self.logger.info("replica to noone") try: signal.signal(signal.SIGHUP, self.wakeup) CYCLE = str_to_duration(self.config.getOption("CYCLE", "24h")) self.logger.info(f"Sleeping for {duration_to_str(CYCLE)}" + \ f" in PID {os.getpid()}") self.logger.debug("send SIGHUP to wake up") time.sleep(CYCLE) except WakeupException: self.logger.warn(f"Restarting as requested (SIGHUP)") signal.signal(signal.SIGHUP, signal.SIG_DFL)
def __init__(self, context, dest, source): super().__init__(context) self.dest = dest if not source.endswith("/"): self.source = source + "/" else: self.source = source hostname = config.host_for(dest) self.path = config.path_for(dest) self.states_filename = f"{self.path}/.ghetto_cluster/" \ f"{hostname}.{context}.json" self.states = persistent_dict.PersistentDict(self.states_filename, \ self.config.getOption("LAZY_WRITE", 5)) self.testing = False self.verbose = self.config.getOption("verbose", "False") == "True" self.logger = logging.getLogger(logger_str(__class__))
def __init__(self, context): super().__init__() self.context = context self.config = config.Config.instance() self.logger = logging.getLogger(logger_str(__class__) + " " + context) self.logger.info(f"Creating clientlet {self.context}") self.path = config.path_for(self.config.get(self.context, "backup")) assert os.path.exists(self.path), f"{self.path} does not exist!" # ALL source contexts (we care a lot) self.sources = {} self.scanners = {} self.random_source_list = [] self.build_sources() self.drops = 0 # count the number of times I drop a file self.update_allocation() self.bailing = False self.sockets = {}
def __init__(self, context): super().__init__() self.context = context self.config = config.Config.instance() self.logger = logging.getLogger(logger_str(__class__) + " " + context) self.logger.info(f"Creating clientlet {self.context}") self.path = config.path_for(self.config.get(self.context, "backup")) assert os.path.exists(self.path), f"{self.path} does not exist!" # creates per-source scanners, random_source_list, claims self.build_sources() self.stats = stats.Stats() self.bailing = False self.datagrams = {} self.current_state = "startup" self.state_timer = elapsed.ElapsedTimer() self.states = {'startup': 0} self.efficiency = {}
def __init__(self, context): super().__init__() self.context = context self.logger = logging.getLogger(utils.logger_str(__class__) \ + " " + context) # self.logger.setLevel(logging.INFO) self.config = config.Config.instance() self.copies = int(self.config.get(self.context, "copies", 2)) self.path = config.path_for(self.config.get(self.context, "source")) self.scanner = scanner.Scanner(self.context, self.path) # TODO: rename this "clients" self.files = dict() # NOT persistent! On startup assume nothing self.drains = elapsed.ExpiringDict(300) # NOT persistent! self.locks = locker.Locker(5) # TODO: timers should relate to a configurable cycle time self.bailout = False self.stats = {'claims': 0, 'drops': 0} self.handling = False
def __init__(self, context): super().__init__() self.context = context logger_str = f"{utils.logger_str(__class__)} {context}" self.logger = logging.getLogger(logger_str) # self.logger.setLevel(logging.INFO) self.config = config.Config.instance() self.copies = int(self.config.get(self.context, "copies", 2)) self.path = config.path_for(self.config.get(self.context, "source")) self.scanner = scanner.ScannerLite(self.context, self.path) self.rescan = utils.get_interval(self.config, "rescan", self.context) lazy_write = self.config.get(context, "LAZY WRITE", 5) lazy_write = utils.str_to_duration(lazy_write) # self.clients: { filename : { client: expiry_time, } } clients_state = f"/tmp/cb.{context}-clients.json.bz2" self.clients = PersistentDict(clients_state, lazy_write=5) self.stats = stats.Stats() self.handling = False
def rsync_from_list(self, source_context, filename): self.logger.debug(f"rsync {source_context}: {filename}") verbose = self.config.get("global", "verbose", False) dryrun = self.config.get("global", "dryrun", False) timer = elapsed.ElapsedTimer() (n, size) = self.sizeof(source_context) source = self.sources[source_context] src_host = config.host_for(source) hostname = config.host_for(self.config.get(self.context, "backup")) if src_host == hostname: # a local copy, just use path source = config.path_for(source) dest = f"{self.paths[source_context]}/" filesfrom = f"--files-from={filename}" # self.logger.debug(f"rsync --delete {source} {dest} --files-from={filename}") prefix = f"{self.context}:{source_context}" rsync_exit = rsync(source, dest, (filesfrom, "-v", "--progress"), prefix=prefix) bps = size / timer.elapsed() self.logger.debug( f"rsync returned {rsync_exit}: {bytes_to_str(bps)}B/s effective") return rsync_exit