def retrieve(self, source_context, filename): self.logger.debug(f"retrieving {source_context}:{filename}" + \ f" to {self.path}/{source_context}/{filename}") # 0: do I have it? if self.scanners[source_context].contains_p(filename): claimed = self.claim(source_context, filename, dropping=True) self.logger.debug( f"I already have {filename}; claimed = {claimed}") if claimed in ("ack", "keep"): return claimed else: self.logger.debug(f"Something's wrong, trying again") # 1: build the filenames (full path) for source + dest source = self.config.get(source_context, "source") + "/" + filename src_host = config.host_for(source) hostname = config.host_for(self.config.get(self.context, "backup")) if src_host == hostname: # a local copy, just use path source = config.path_for(source) dest = f"{self.path}/{source_context}/{filename}" # 2: make the transfer self.logger.debug(f"rsync {source} {dest}") self.makedirs(dest) rsync_stat = file_state.rsync(source, dest) self.logger.debug(f"rsync returned {rsync_stat}") if rsync_stat == 0: # 3: record it self.claim(source_context, filename, dropping=True) else: self.logger.error("Failed to rsync???") raise FileNotFoundError
def retrieve(self, source_context, filename, counter=0): self.logger.debug( f"retrieving {source_context}:{filename} to {self.path}/{source_context}/{filename}" ) # 0: do I have it? if self.scanners[source_context].contains_p(filename): self.logger.debug(f"I already have {filename}") # just send the one; inform() will handle the rest self.claim(source_context, filename, dropping=True) return # 1: build the filenames (full path) for source + dest source = self.config.get(source_context, "source") + "/" + filename src_host = config.host_for(source) hostname = config.host_for(self.config.get(self.context, "backup")) if src_host == hostname: # a local copy, just use path source = config.path_for(source) dest_path = f"{self.path}/{source_context}" dest = f"{dest_path}/{filename}" # 2: make the transfer self.logger.debug(f"rsync {source} {dest}") self.makedirs(dest) rsync_stat = file_state.rsync(source, dest) self.logger.debug(f"rsync returned {rsync_stat}") if rsync_stat == 0: # 3: record it self.claim(source_context, filename, dropping=True) # no retry else: self.logger.error("Failed to rsync???") raise FileNotFoundError
def build_sources(self): source_contexts = self.config.get_contexts_for_key("source") self.prune_sources(source_contexts) for source_context, source in source_contexts.items(): self.sources[source_context] = config.host_for(source) path = f"{self.path}/{source_context}" self.scanners[source_context] = \ scanner.Scanner(source_context, path, name=f"{self.context}:{source_context}") self.random_source_list.append(source_context) random.shuffle(self.random_source_list)
def get_datagram(self, source_context): ADDRESS = config.host_for(self.sources[source_context]) PORT = int(self.config.get("global", "PORT", "5005")) if source_context not in self.datagrams: # self.logger.debug(f"building a datagram for {source_context}") name = f"Datagram {self.context}" self.datagrams[source_context] = \ Datagram("Bogus", server=ADDRESS, port=PORT, name=name, compress=True) self.datagrams[source_context].ping() return self.datagrams[source_context]
def __init__(self, context, source, testing=False): self.logger = logging.getLogger("gc.GhettoClusterSource") self.config = config.Config.instance() self.context = context self.source = source self.testing = testing self.verbose = self.config.getOption("verbose", "False") == "True" self.path = config.path_for(source) hostname = config.host_for(source) self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json" self.states = persistent_dict.PersistentDict(self.states_filename, \ self.config.getOption("LAZY_WRITE", 5))
def rsync_from_list(self, source_context, filename): self.logger.debug(f"rsync {source_context}: {filename}") verbose = self.config.get("global", "verbose", False) dryrun = self.config.get("global", "dryrun", False) timer = elapsed.ElapsedTimer() (n, size) = self.sizeof(source_context) source = self.sources[source_context] src_host = config.host_for(source) hostname = config.host_for(self.config.get(self.context, "backup")) if src_host == hostname: # a local copy, just use path source = config.path_for(source) dest = f"{self.paths[source_context]}/" filesfrom = f"--files-from={filename}" # self.logger.debug(f"rsync --delete {source} {dest} --files-from={filename}") prefix = f"{self.context}:{source_context}" rsync_exit = rsync(source, dest, (filesfrom, "-v", "--progress"), prefix=prefix) bps = size / timer.elapsed() self.logger.debug( f"rsync returned {rsync_exit}: {bytes_to_str(bps)}B/s effective") return rsync_exit
def __init__(self, context, replica, testing=False): self.logger = logging.getLogger("gc.GhettoClusterReplica") self.config = config.Config.instance() self.context = context self.replica = replica self.testing = testing self.verbose = self.config.getOption("verbose", "False") == "True" self.source = self.config.get_source_for_context(self.context) if not self.source.endswith("/"): self.source = self.source + "/" hostname = config.host_for(replica) self.path = config.path_for(replica) self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json"
def __init__(self, context, dest, source): super().__init__(context) self.dest = dest if not source.endswith("/"): self.source = source + "/" else: self.source = source hostname = config.host_for(dest) self.path = config.path_for(dest) self.states_filename = f"{self.path}/.ghetto_cluster/" \ f"{hostname}.{context}.json" self.states = persistent_dict.PersistentDict(self.states_filename, \ self.config.getOption("LAZY_WRITE", 5)) self.testing = False self.verbose = self.config.getOption("verbose", "False") == "True" self.logger = logging.getLogger(logger_str(__class__))
def cleanup_gc_dir(self): self.logger.warn("Cleanup time") hostname = config.host_for(self.source) valid_files = [f"{hostname}.{self.context}.json"] replicas = self.config.get_replicas_for_context(self.context) if len(replicas) > 0: for replica in replicas: statefile = f"{config.host_for(replica)}.{self.context}.json" valid_files.append(statefile) else: print("wat") print(f"Valid files: {valid_files}") json_files = [f for f in os.listdir(self.path) \ if os.path.isfile(os.path.join(self.path, f)) \ and f.endswith("json")] for json_file in json_files: if json_file not in valid_files: self.logger.info(f"extraneous file: {json_file}")
def __str__(self): hostname = config.host_for(self.config.get(self.context, "backup")) return f"{hostname}: {utils.bytes_to_str(self.consumption())}/{utils.bytes_to_str(self.allocation)}"
def __str__(self): hostname = config.host_for(self.config.get(self.context, "backup")) consumption = bytes_to_str(self.consumption()) allocation = bytes_to_str(self.allocation) return f"{hostname}: {consumption}/{allocation}"
def state_filename(context, path_src, hostname_src): path = f"{config.path_for(path_src)}/.gc" hostname = config.host_for(hostname_src) return f"{path}/{hostname}.{context}.json"
def inspect_replica(self, source, source_states, context, replica): prefix = f"{config.path_for(source)}/.ghetto_cluster/" hostname = config.host_for(replica) replica_file = f"{prefix}/{hostname}.{context}.json" replica_states = persistent_dict.PersistentDict(replica_file) if self.config.getOption("verbose", "False") == "True": msg = f"{replica} ::\n" missing = mismatch = extra = 0 lines = 0 for fqde, source_state in source_states.items(): if replica_states.contains_p(fqde): replica_states.touch(fqde) replica_state = replica_states.get(fqde) if source_state["size"] != replica_state["size"]: mismatch += 1 if lines < 10: msg += f"\tmismatch: {fqde} " if replica_state["ctime"] > source_state["ctime"]: msg += "replica is newer" else: # TODO: tell how stale it is msg += f"{duration_to_str(source_state['ctime'] - replica_state['ctime'])} stale" msg += "\n" lines += 1 else: missing += 1 if lines < 10: msg += f"\tmissing: {fqde}\n" lines += 1 if lines == 10: msg += "\t...\n" lines = 11 extra = len(replica_states.clean_keys()) if missing + mismatch + extra != 0: pct = 100 * len(replica_states.items()) / len( source_states.items()) if pct > 100: pct = 100 if int(pct) == 100: pct = 99 msg += f"\tmissing: {missing} ({pct:.0f}% complete); " + \ f"mismatched: {mismatch}; " + \ f"extra: {extra}" else: # TODO: staleness report # msg = self.check_replica_staleness(source, source_states, context, replica) + msg[:-4] msg = "Complete: " + msg[:-4] else: (target_files, target_bytes) = \ self.sizeof(source_states) (nlines, nbytes) = \ self.sizeof(replica_states) pct_complete = int(100 * nlines / target_files) if nlines == target_files: # msg = self.check_replica_staleness(source, source_states, replica_file) + f": {replica}" if self.replica_is_current(source, source_states, replica_file): msg = f" Complete : {replica}" else: msg = f" Stale: {replica}" elif nlines == 0: msg = f" Not started: {replica}" elif nlines < target_files: if self.replica_is_current(source, source_states, replica_file): msg = " Active: " else: msg = " Stale: " msg += f"{pct_complete:3d}% {nlines}/{target_files}: {replica}" else: msg += f"WARNING: too many files in replica " + \ f"{config.host_for(replica)}\n" + \ f"\t{nlines}/{target_files}: {replica}" return msg