Esempio n. 1
0
    def retrieve(self, source_context, filename):
        self.logger.debug(f"retrieving {source_context}:{filename}" + \
                            f" to {self.path}/{source_context}/{filename}")
        # 0: do I have it?
        if self.scanners[source_context].contains_p(filename):
            claimed = self.claim(source_context, filename, dropping=True)
            self.logger.debug(
                f"I already have {filename}; claimed = {claimed}")
            if claimed in ("ack", "keep"):
                return claimed
            else:
                self.logger.debug(f"Something's wrong, trying again")

        # 1: build the filenames (full path) for source + dest
        source = self.config.get(source_context, "source") + "/" + filename
        src_host = config.host_for(source)
        hostname = config.host_for(self.config.get(self.context, "backup"))
        if src_host == hostname:  # a local copy, just use path
            source = config.path_for(source)
        dest = f"{self.path}/{source_context}/{filename}"

        # 2: make the transfer
        self.logger.debug(f"rsync {source} {dest}")
        self.makedirs(dest)
        rsync_stat = file_state.rsync(source, dest)
        self.logger.debug(f"rsync returned {rsync_stat}")

        if rsync_stat == 0:
            # 3: record it
            self.claim(source_context, filename, dropping=True)
        else:
            self.logger.error("Failed to rsync???")
            raise FileNotFoundError
    def retrieve(self, source_context, filename, counter=0):
        self.logger.debug(
            f"retrieving {source_context}:{filename} to {self.path}/{source_context}/{filename}"
        )
        # 0: do I have it?
        if self.scanners[source_context].contains_p(filename):
            self.logger.debug(f"I already have {filename}")
            # just send the one; inform() will handle the rest
            self.claim(source_context, filename, dropping=True)
            return

        # 1: build the filenames (full path) for source + dest
        source = self.config.get(source_context, "source") + "/" + filename
        src_host = config.host_for(source)
        hostname = config.host_for(self.config.get(self.context, "backup"))
        if src_host == hostname:  # a local copy, just use path
            source = config.path_for(source)
        dest_path = f"{self.path}/{source_context}"
        dest = f"{dest_path}/{filename}"

        # 2: make the transfer
        self.logger.debug(f"rsync {source} {dest}")
        self.makedirs(dest)
        rsync_stat = file_state.rsync(source, dest)
        self.logger.debug(f"rsync returned {rsync_stat}")

        if rsync_stat == 0:
            # 3: record it
            self.claim(source_context, filename, dropping=True)  # no retry
        else:
            self.logger.error("Failed to rsync???")
            raise FileNotFoundError
    def build_sources(self):
        source_contexts = self.config.get_contexts_for_key("source")
        self.prune_sources(source_contexts)

        for source_context, source in source_contexts.items():
            self.sources[source_context] = config.host_for(source)
            path = f"{self.path}/{source_context}"
            self.scanners[source_context] = \
                scanner.Scanner(source_context, path,
                                name=f"{self.context}:{source_context}")
            self.random_source_list.append(source_context)
        random.shuffle(self.random_source_list)
Esempio n. 4
0
    def get_datagram(self, source_context):
        ADDRESS = config.host_for(self.sources[source_context])
        PORT = int(self.config.get("global", "PORT", "5005"))

        if source_context not in self.datagrams:
            # self.logger.debug(f"building a datagram for {source_context}")
            name = f"Datagram {self.context}"
            self.datagrams[source_context] = \
                        Datagram("Bogus", server=ADDRESS, port=PORT,
                                    name=name, compress=True)
            self.datagrams[source_context].ping()
        return self.datagrams[source_context]
Esempio n. 5
0
 def __init__(self, context, source, testing=False):
     self.logger = logging.getLogger("gc.GhettoClusterSource")
     self.config = config.Config.instance()
     self.context = context
     self.source = source
     self.testing = testing
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.path = config.path_for(source)
     hostname = config.host_for(source)
     self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json"
     self.states = persistent_dict.PersistentDict(self.states_filename, \
                     self.config.getOption("LAZY_WRITE", 5))
Esempio n. 6
0
 def rsync_from_list(self, source_context, filename):
     self.logger.debug(f"rsync {source_context}: {filename}")
     verbose = self.config.get("global", "verbose", False)
     dryrun = self.config.get("global", "dryrun", False)
     timer = elapsed.ElapsedTimer()
     (n, size) = self.sizeof(source_context)
     source = self.sources[source_context]
     src_host = config.host_for(source)
     hostname = config.host_for(self.config.get(self.context, "backup"))
     if src_host == hostname:  # a local copy, just use path
         source = config.path_for(source)
     dest = f"{self.paths[source_context]}/"
     filesfrom = f"--files-from={filename}"
     # self.logger.debug(f"rsync --delete {source} {dest} --files-from={filename}")
     prefix = f"{self.context}:{source_context}"
     rsync_exit = rsync(source,
                        dest, (filesfrom, "-v", "--progress"),
                        prefix=prefix)
     bps = size / timer.elapsed()
     self.logger.debug(
         f"rsync returned {rsync_exit}: {bytes_to_str(bps)}B/s effective")
     return rsync_exit
Esempio n. 7
0
 def __init__(self, context, replica, testing=False):
     self.logger = logging.getLogger("gc.GhettoClusterReplica")
     self.config = config.Config.instance()
     self.context = context
     self.replica = replica
     self.testing = testing
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.source = self.config.get_source_for_context(self.context)
     if not self.source.endswith("/"):
         self.source = self.source + "/"
     hostname = config.host_for(replica)
     self.path = config.path_for(replica)
     self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json"
 def __init__(self, context, dest, source):
     super().__init__(context)
     self.dest = dest
     if not source.endswith("/"):
         self.source = source + "/"
     else:
         self.source = source
     hostname = config.host_for(dest)
     self.path = config.path_for(dest)
     self.states_filename = f"{self.path}/.ghetto_cluster/" \
                            f"{hostname}.{context}.json"
     self.states = persistent_dict.PersistentDict(self.states_filename, \
                     self.config.getOption("LAZY_WRITE", 5))
     self.testing = False
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.logger = logging.getLogger(logger_str(__class__))
Esempio n. 9
0
 def cleanup_gc_dir(self):
     self.logger.warn("Cleanup time")
     hostname = config.host_for(self.source)
     valid_files = [f"{hostname}.{self.context}.json"]
     replicas = self.config.get_replicas_for_context(self.context)
     if len(replicas) > 0:
         for replica in replicas:
             statefile = f"{config.host_for(replica)}.{self.context}.json"
             valid_files.append(statefile)
     else:
         print("wat")
     print(f"Valid files: {valid_files}")
     json_files = [f for f in os.listdir(self.path) \
                         if os.path.isfile(os.path.join(self.path, f)) \
                             and f.endswith("json")]
     for json_file in json_files:
         if json_file not in valid_files:
             self.logger.info(f"extraneous file: {json_file}")
 def __str__(self):
     hostname = config.host_for(self.config.get(self.context, "backup"))
     return f"{hostname}: {utils.bytes_to_str(self.consumption())}/{utils.bytes_to_str(self.allocation)}"
Esempio n. 11
0
 def __str__(self):
     hostname = config.host_for(self.config.get(self.context, "backup"))
     consumption = bytes_to_str(self.consumption())
     allocation = bytes_to_str(self.allocation)
     return f"{hostname}: {consumption}/{allocation}"
Esempio n. 12
0
def state_filename(context, path_src, hostname_src):
    path = f"{config.path_for(path_src)}/.gc"
    hostname = config.host_for(hostname_src)
    return f"{path}/{hostname}.{context}.json"
 def inspect_replica(self, source, source_states, context, replica):
     prefix = f"{config.path_for(source)}/.ghetto_cluster/"
     hostname = config.host_for(replica)
     replica_file = f"{prefix}/{hostname}.{context}.json"
     replica_states = persistent_dict.PersistentDict(replica_file)
     if self.config.getOption("verbose", "False") == "True":
         msg = f"{replica} ::\n"
         missing = mismatch = extra = 0
         lines = 0
         for fqde, source_state in source_states.items():
             if replica_states.contains_p(fqde):
                 replica_states.touch(fqde)
                 replica_state = replica_states.get(fqde)
                 if source_state["size"] != replica_state["size"]:
                     mismatch += 1
                     if lines < 10:
                         msg += f"\tmismatch: {fqde} "
                         if replica_state["ctime"] > source_state["ctime"]:
                             msg += "replica is newer"
                         else:
                             # TODO: tell how stale it is
                             msg += f"{duration_to_str(source_state['ctime'] - replica_state['ctime'])} stale"
                         msg += "\n"
                         lines += 1
             else:
                 missing += 1
                 if lines < 10:
                     msg += f"\tmissing: {fqde}\n"
                     lines += 1
             if lines == 10:
                 msg += "\t...\n"
                 lines = 11
         extra = len(replica_states.clean_keys())
         if missing + mismatch + extra != 0:
             pct = 100 * len(replica_states.items()) / len(
                 source_states.items())
             if pct > 100:
                 pct = 100
             if int(pct) == 100:
                 pct = 99
             msg += f"\tmissing: {missing} ({pct:.0f}% complete); " + \
                     f"mismatched: {mismatch}; " + \
                     f"extra: {extra}"
         else:
             # TODO: staleness report
             # msg = self.check_replica_staleness(source, source_states, context, replica) + msg[:-4]
             msg = "Complete: " + msg[:-4]
     else:
         (target_files, target_bytes) = \
             self.sizeof(source_states)
         (nlines, nbytes) = \
             self.sizeof(replica_states)
         pct_complete = int(100 * nlines / target_files)
         if nlines == target_files:
             # msg = self.check_replica_staleness(source, source_states, replica_file) + f": {replica}"
             if self.replica_is_current(source, source_states,
                                        replica_file):
                 msg = f"  Complete : {replica}"
             else:
                 msg = f"  Stale: {replica}"
         elif nlines == 0:
             msg = f"  Not started: {replica}"
         elif nlines < target_files:
             if self.replica_is_current(source, source_states,
                                        replica_file):
                 msg = "  Active: "
             else:
                 msg = "  Stale: "
             msg += f"{pct_complete:3d}% {nlines}/{target_files}: {replica}"
         else:
             msg += f"WARNING: too many files in replica " + \
                     f"{config.host_for(replica)}\n" + \
                     f"\t{nlines}/{target_files}: {replica}"
     return msg