def pull(self):
        self.logger.debug(f"Pulling {self.dest} from {self.source}")
        # self.sync_filelist()
        # cwd = os.getcwd()
        # os.chdir(self.path)
        # self.pullfiles(".")
        args = []
        # TODO: parse out multiple options
        options = self.config.getConfig(self.context, "rsync options")
        # if options is not None and "delete" in options:
        #     self.logger.debug("DELETING")
        #     args = [ "--delete" ]
        #         # , "--delete-excluded" ] # this will screw with replicas
        args = []
        for option in options:
            args += [f"--{option}"]
        # elif self.verbose:
        #     self.logger.info("Probably not deleting")
        ignorals = self.build_ignorals()
        if len(ignorals) > 0:
            args += [f"--exclude={item}" for item in ignorals]

        if self.testing:
            # test mode: strip out hostnames for the rsync
            source = config.path_for(self.source)
        else:
            source = self.source

        dest = config.path_for(self.dest)
        self.logger.info("Starting rsync ...")
        file_state.rsync(source, dest, args)
Example #2
0
    def __init__(self, context):
        super().__init__()
        self.context = context

        self.logger = logging.getLogger(utils.logger_str(__class__) \
                        + " " + context)
        # self.logger.setLevel(logging.INFO)
        self.config = config.Config.instance()
        self.copies = int(self.config.get(self.context, "copies", 2))
        self.path = config.path_for(self.config.get(self.context, "source"))
        self.scanner = scanner.Scanner(self.context, self.path)

        lazy_write = utils.str_to_duration(
            self.config.get(context, "LAZY WRITE", 5))
        # TODO: support expiration
        self.rescan = utils.str_to_duration(
            self.config.get(self.context, "rescan"))
        self.clients = persistent_dict.PersistentDict(
            f"/tmp/cb.s{context}.json.bz2",
            lazy_write=lazy_write,
            cls=lock.Lock,
            expiry=self.rescan)
        self.drains = elapsed.ExpiringDict(300)  # NOT persistent!
        self.locks = locker.Locker(5)
        # TODO: timers should relate to a configurable cycle time
        self.bailout = False
        self.stats = {'claims': 0, 'drops': 0}
        self.handling = False
    def retrieve(self, source_context, filename, counter=0):
        self.logger.debug(
            f"retrieving {source_context}:{filename} to {self.path}/{source_context}/{filename}"
        )
        # 0: do I have it?
        if self.scanners[source_context].contains_p(filename):
            self.logger.debug(f"I already have {filename}")
            # just send the one; inform() will handle the rest
            self.claim(source_context, filename, dropping=True)
            return

        # 1: build the filenames (full path) for source + dest
        source = self.config.get(source_context, "source") + "/" + filename
        src_host = config.host_for(source)
        hostname = config.host_for(self.config.get(self.context, "backup"))
        if src_host == hostname:  # a local copy, just use path
            source = config.path_for(source)
        dest_path = f"{self.path}/{source_context}"
        dest = f"{dest_path}/{filename}"

        # 2: make the transfer
        self.logger.debug(f"rsync {source} {dest}")
        self.makedirs(dest)
        rsync_stat = file_state.rsync(source, dest)
        self.logger.debug(f"rsync returned {rsync_stat}")

        if rsync_stat == 0:
            # 3: record it
            self.claim(source_context, filename, dropping=True)  # no retry
        else:
            self.logger.error("Failed to rsync???")
            raise FileNotFoundError
    def __init__(self, context):
        super().__init__()
        self.context = context
        self.config = config.Config.instance()
        self.logger = logging.getLogger(logger_str(__class__) + " " + context)
        self.logger.info(f"Creating clientlet {self.context}")

        self.path = config.path_for(self.config.get(self.context, "backup"))
        assert os.path.exists(self.path), f"{self.path} does not exist!"

        # ALL source contexts (we care a lot)
        self.sources = {}
        self.scanners = {}
        self.random_source_list = []
        self.build_sources()

        lazy_write = utils.str_to_duration(
            self.config.get(context, "LAZY WRITE", 5))
        # TODO: my cache of claims should expire in rescan/2
        self.rescan = self.get_interval("rescan") // 2
        self.claims = PersistentDict(f"/tmp/cb.c{context}.json.bz2",
                                     lazy_write=5,
                                     expiry=self.rescan)
        self.drops = 0  # count the number of times I drop a file
        self.stats = stats.Stats()

        self.update_allocation()
        self.bailing = False
        self.datagrams = {}
    def retrieve(self, source_context, filename):
        self.logger.debug(f"retrieving {source_context}:{filename}" + \
                            f" to {self.path}/{source_context}/{filename}")
        # 0: do I have it?
        if self.scanners[source_context].contains_p(filename):
            claimed = self.claim(source_context, filename, dropping=True)
            self.logger.debug(
                f"I already have {filename}; claimed = {claimed}")
            if claimed in ("ack", "keep"):
                return claimed
            else:
                self.logger.debug(f"Something's wrong, trying again")

        # 1: build the filenames (full path) for source + dest
        source = self.config.get(source_context, "source") + "/" + filename
        src_host = config.host_for(source)
        hostname = config.host_for(self.config.get(self.context, "backup"))
        if src_host == hostname:  # a local copy, just use path
            source = config.path_for(source)
        dest = f"{self.path}/{source_context}/{filename}"

        # 2: make the transfer
        self.logger.debug(f"rsync {source} {dest}")
        self.makedirs(dest)
        rsync_stat = file_state.rsync(source, dest)
        self.logger.debug(f"rsync returned {rsync_stat}")

        if rsync_stat == 0:
            # 3: record it
            self.claim(source_context, filename, dropping=True)
        else:
            self.logger.error("Failed to rsync???")
            raise FileNotFoundError
 def __init__(self, context):
     super().__init__(context)
     self.source = self.config.get_source_for_context(context)
     self.path = config.path_for(self.source)
     persistent_dict_file = f"{self.path}/.ghetto_cluster/" \
                            f"source.{context}.json"
     self.states = persistent_dict.PersistentDict(persistent_dict_file, \
                         self.config.getOption("LAZY_WRITE", 5))
     self.logger = logging.getLogger(logger_str(__class__))
Example #7
0
 def test_config(self):
     cfg = config.Config.instance()
     cfg.init("test-config.txt", "source", "backup", hostname="localhost")
     contexts = list(cfg.get_contexts_for_key("source").keys())
     print(contexts)
     context = contexts[1]
     path = config.path_for(cfg.get(context, "source"))
     print(path)
     s = scanner.Scanner(context, path)
     s.scan()
     filename = list(s.keys())[0]
     self.assertTrue(os.path.exists(f"{path}/{filename}"))
Example #8
0
 def __init__(self, context, source, testing=False):
     self.logger = logging.getLogger("gc.GhettoClusterSource")
     self.config = config.Config.instance()
     self.context = context
     self.source = source
     self.testing = testing
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.path = config.path_for(source)
     hostname = config.host_for(source)
     self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json"
     self.states = persistent_dict.PersistentDict(self.states_filename, \
                     self.config.getOption("LAZY_WRITE", 5))
Example #9
0
 def __init__(self, context, replica, testing=False):
     self.logger = logging.getLogger("gc.GhettoClusterReplica")
     self.config = config.Config.instance()
     self.context = context
     self.replica = replica
     self.testing = testing
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.source = self.config.get_source_for_context(self.context)
     if not self.source.endswith("/"):
         self.source = self.source + "/"
     hostname = config.host_for(replica)
     self.path = config.path_for(replica)
     self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json"
 def start(self):
     while True:
         self.logger.info("Starting")
         self.config.load()
         sources = self.config.get_sources_for_host(self.hostname)
         print(f"sources: {sources}")
         if len(sources.items()) > 0:
             for context, source in sources.items():
                 self.logger.info(f"{context}: {source}")
                 gcm = GhettoClusterSource(context)
                 gcm.scan()
                 self.get_status(context, source, False)
             self.logger.info("sources are complete.")
         else:
             self.logger.info("source of None")
         replicas = self.config.get_replicas_for_host(self.hostname)
         if len(replicas.items()) > 0:
             for context, replica in replicas.items():
                 self.logger.info(f"{context}: {replica}")
                 source = self.config.get_source_for_context(context)
                 dest = config.path_for(replica)
                 gcs = GhettoClusterReplica(context, replica, source)
                 # gcs.pull()
                 # gcs.scan()
                 puller = Thread(target=gcs.pull)
                 self.logger.info("Starting pull thread")
                 puller.start()
                 timer = elapsed.ElapsedTimer()
                 while puller.is_alive():
                     if timer.once_every(15):
                         scanner = Thread(target=gcs.scan)
                         self.logger.debug("Starting scan thread")
                         scanner.start()
                         scanner.join()
                         self.logger.debug("Scan thread finished")
                     else:
                         time.sleep(1)  # spin, but not hard
                 gcs.scan()
             self.logger.info("Replicas are complete")
         else:
             self.logger.info("replica to noone")
         try:
             signal.signal(signal.SIGHUP, self.wakeup)
             CYCLE = str_to_duration(self.config.getOption("CYCLE", "24h"))
             self.logger.info(f"Sleeping for {duration_to_str(CYCLE)}" + \
                                 f" in PID {os.getpid()}")
             self.logger.debug("send SIGHUP to wake up")
             time.sleep(CYCLE)
         except WakeupException:
             self.logger.warn(f"Restarting as requested (SIGHUP)")
             signal.signal(signal.SIGHUP, signal.SIG_DFL)
 def __init__(self, context, dest, source):
     super().__init__(context)
     self.dest = dest
     if not source.endswith("/"):
         self.source = source + "/"
     else:
         self.source = source
     hostname = config.host_for(dest)
     self.path = config.path_for(dest)
     self.states_filename = f"{self.path}/.ghetto_cluster/" \
                            f"{hostname}.{context}.json"
     self.states = persistent_dict.PersistentDict(self.states_filename, \
                     self.config.getOption("LAZY_WRITE", 5))
     self.testing = False
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.logger = logging.getLogger(logger_str(__class__))
    def __init__(self, context):
        super().__init__()
        self.context = context
        self.config = config.Config.instance()
        self.logger = logging.getLogger(logger_str(__class__) + " " + context)
        self.logger.info(f"Creating clientlet {self.context}")

        self.path = config.path_for(self.config.get(self.context, "backup"))
        assert os.path.exists(self.path), f"{self.path} does not exist!"

        # ALL source contexts (we care a lot)
        self.sources = {}
        self.scanners = {}
        self.random_source_list = []
        self.build_sources()
        self.drops = 0  # count the number of times I drop a file

        self.update_allocation()
        self.bailing = False
        self.sockets = {}
Example #13
0
    def __init__(self, context):
        super().__init__()
        self.context = context
        self.config = config.Config.instance()
        self.logger = logging.getLogger(logger_str(__class__) + " " + context)
        self.logger.info(f"Creating clientlet {self.context}")

        self.path = config.path_for(self.config.get(self.context, "backup"))
        assert os.path.exists(self.path), f"{self.path} does not exist!"

        # creates per-source scanners, random_source_list, claims
        self.build_sources()

        self.stats = stats.Stats()
        self.bailing = False
        self.datagrams = {}
        self.current_state = "startup"
        self.state_timer = elapsed.ElapsedTimer()
        self.states = {'startup': 0}
        self.efficiency = {}
    def __init__(self, context):
        super().__init__()
        self.context = context

        self.logger = logging.getLogger(utils.logger_str(__class__) \
                        + " " + context)
        # self.logger.setLevel(logging.INFO)
        self.config = config.Config.instance()
        self.copies = int(self.config.get(self.context, "copies", 2))
        self.path = config.path_for(self.config.get(self.context, "source"))
        self.scanner = scanner.Scanner(self.context, self.path)

        # TODO: rename this "clients"
        self.files = dict()  # NOT persistent!  On startup assume nothing
        self.drains = elapsed.ExpiringDict(300)  # NOT persistent!
        self.locks = locker.Locker(5)
        # TODO: timers should relate to a configurable cycle time
        self.bailout = False
        self.stats = {'claims': 0, 'drops': 0}
        self.handling = False
Example #15
0
    def __init__(self, context):
        super().__init__()
        self.context = context

        logger_str = f"{utils.logger_str(__class__)} {context}"
        self.logger = logging.getLogger(logger_str)
        # self.logger.setLevel(logging.INFO)

        self.config = config.Config.instance()
        self.copies = int(self.config.get(self.context, "copies", 2))
        self.path = config.path_for(self.config.get(self.context, "source"))
        self.scanner = scanner.ScannerLite(self.context, self.path)
        self.rescan = utils.get_interval(self.config, "rescan", self.context)

        lazy_write = self.config.get(context, "LAZY WRITE", 5)
        lazy_write = utils.str_to_duration(lazy_write)
        # self.clients: { filename : { client: expiry_time, } }
        clients_state = f"/tmp/cb.{context}-clients.json.bz2"
        self.clients = PersistentDict(clients_state, lazy_write=5)
        self.stats = stats.Stats()
        self.handling = False
Example #16
0
 def rsync_from_list(self, source_context, filename):
     self.logger.debug(f"rsync {source_context}: {filename}")
     verbose = self.config.get("global", "verbose", False)
     dryrun = self.config.get("global", "dryrun", False)
     timer = elapsed.ElapsedTimer()
     (n, size) = self.sizeof(source_context)
     source = self.sources[source_context]
     src_host = config.host_for(source)
     hostname = config.host_for(self.config.get(self.context, "backup"))
     if src_host == hostname:  # a local copy, just use path
         source = config.path_for(source)
     dest = f"{self.paths[source_context]}/"
     filesfrom = f"--files-from={filename}"
     # self.logger.debug(f"rsync --delete {source} {dest} --files-from={filename}")
     prefix = f"{self.context}:{source_context}"
     rsync_exit = rsync(source,
                        dest, (filesfrom, "-v", "--progress"),
                        prefix=prefix)
     bps = size / timer.elapsed()
     self.logger.debug(
         f"rsync returned {rsync_exit}: {bytes_to_str(bps)}B/s effective")
     return rsync_exit